diff --git a/AudioQnA/benchmark/accuracy/README.md b/AudioQnA/benchmark/accuracy/README.md new file mode 100644 index 000000000..67119121a --- /dev/null +++ b/AudioQnA/benchmark/accuracy/README.md @@ -0,0 +1,51 @@ +# AudioQnA accuracy Evaluation + +AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy. + +## Dataset + +We evaluate the ASR accuracy on the test set of librispeech [dataset](https://huggingface.co/datasets/andreagasparini/librispeech_test_only), which contains 2620 records of audio and texts. + +## Metrics + +We evaluate the WER (Word Error Rate) metric of the ASR microservice. + +## Evaluation + +### Launch ASR microservice + +Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr). + +```bash +git clone https://github.com/opea-project/GenAIComps +cd GenAIComps +docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile . +# change the name of model by editing model_name_or_path you want to evaluate +docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest --model_name_or_path "openai/whisper-tiny" +``` + +### Evaluate + +Install dependencies: + +``` +pip install -r requirements.txt +``` + +Evaluate the performance with the LLM: + +```py +# validate the offline model +# python offline_evaluate.py +# validate the online asr microservice accuracy +python online_evaluate.py +``` + +### Performance Result + +Here is the tested result for your reference +|| WER | +| --- | ---- | +|whisper-large-v2| 2.87| +|whisper-large| 2.7 | +|whisper-medium| 3.45 | diff --git a/AudioQnA/benchmark/accuracy/local_eval.py b/AudioQnA/benchmark/accuracy/local_eval.py new file mode 100644 index 000000000..1ef7b6dfa --- /dev/null +++ b/AudioQnA/benchmark/accuracy/local_eval.py @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import torch +from datasets import load_dataset +from evaluate import load +from transformers import WhisperForConditionalGeneration, WhisperProcessor + +device = "cuda" if torch.cuda.is_available() else "cpu" + +MODEL_NAME = "openai/whisper-large-v2" + +librispeech_test_clean = load_dataset( + "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True +) +processor = WhisperProcessor.from_pretrained(MODEL_NAME) +model = WhisperForConditionalGeneration.from_pretrained(MODEL_NAME).to(device) + + +def map_to_pred(batch): + audio = batch["audio"] + input_features = processor(audio["array"], sampling_rate=audio["sampling_rate"], return_tensors="pt").input_features + batch["reference"] = processor.tokenizer._normalize(batch["text"]) + + with torch.no_grad(): + predicted_ids = model.generate(input_features.to(device))[0] + transcription = processor.decode(predicted_ids) + batch["prediction"] = processor.tokenizer._normalize(transcription) + return batch + + +result = librispeech_test_clean.map(map_to_pred) + +wer = load("wer") +print(100 * wer.compute(references=result["reference"], predictions=result["prediction"])) diff --git a/AudioQnA/benchmark/accuracy/online_eval.py b/AudioQnA/benchmark/accuracy/online_eval.py new file mode 100644 index 000000000..a7854c95b --- /dev/null +++ b/AudioQnA/benchmark/accuracy/online_eval.py @@ -0,0 +1,56 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import base64 +import json + +import requests +import torch +from datasets import load_dataset +from evaluate import load +from pydub import AudioSegment +from transformers import WhisperForConditionalGeneration, WhisperProcessor + +MODEL_NAME = "openai/whisper-large-v2" +processor = WhisperProcessor.from_pretrained(MODEL_NAME) + +librispeech_test_clean = load_dataset( + "andreagasparini/librispeech_test_only", "clean", split="test", trust_remote_code=True +) + + +def map_to_pred(batch): + batch["reference"] = processor.tokenizer._normalize(batch["text"]) + + file_path = batch["file"] + # process the file_path + pidx = file_path.rfind("/") + sidx = file_path.rfind(".") + + file_path_prefix = file_path[: pidx + 1] + file_path_suffix = file_path[sidx:] + file_path_mid = file_path[pidx + 1 : sidx] + splits = file_path_mid.split("-") + file_path_mid = f"LibriSpeech/test-clean/{splits[0]}/{splits[1]}/{file_path_mid}" + + file_path = file_path_prefix + file_path_mid + file_path_suffix + + audio = AudioSegment.from_file(file_path) + audio.export("tmp.wav") + with open("tmp.wav", "rb") as f: + test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8") + + inputs = {"audio": test_audio_base64_str} + endpoint = "http://localhost:7066/v1/asr" + response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None}) + + result_str = response.json()["asr_result"] + + batch["prediction"] = processor.tokenizer._normalize(result_str) + return batch + + +result = librispeech_test_clean.map(map_to_pred) + +wer = load("wer") +print(100 * wer.compute(references=result["reference"], predictions=result["prediction"])) diff --git a/AudioQnA/benchmark/accuracy/requirements.txt b/AudioQnA/benchmark/accuracy/requirements.txt new file mode 100644 index 000000000..c3f6c51a1 --- /dev/null +++ b/AudioQnA/benchmark/accuracy/requirements.txt @@ -0,0 +1,8 @@ +datasets +evaluate +jiwer +librosa +pydub +soundfile +torch +transformers diff --git a/AudioQnA/docker_compose/intel/cpu/xeon/README.md b/AudioQnA/docker_compose/intel/cpu/xeon/README.md index 338771dd0..d08061284 100644 --- a/AudioQnA/docker_compose/intel/cpu/xeon/README.md +++ b/AudioQnA/docker_compose/intel/cpu/xeon/README.md @@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ -H 'Content-Type: application/json' # speecht5 service diff --git a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md index 28ec3f402..842227ee5 100644 --- a/AudioQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/AudioQnA/docker_compose/intel/hpu/gaudi/README.md @@ -108,7 +108,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \ -H 'Content-Type: application/json' # speecht5 service diff --git a/AudioQnA/tests/test_gmc_on_gaudi.sh b/AudioQnA/tests/test_gmc_on_gaudi.sh index 898a91524..d90bd3624 100755 --- a/AudioQnA/tests/test_gmc_on_gaudi.sh +++ b/AudioQnA/tests/test_gmc_on_gaudi.sh @@ -34,7 +34,7 @@ function validate_audioqa() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}") - byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) + byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) echo "$byte_str" > $LOG_PATH/curl_audioqa.log if [ -z "$byte_str" ]; then echo "audioqa failed, please check the logs in ${LOG_PATH}!" diff --git a/AudioQnA/tests/test_gmc_on_xeon.sh b/AudioQnA/tests/test_gmc_on_xeon.sh index ed6adddd2..15e04e62c 100755 --- a/AudioQnA/tests/test_gmc_on_xeon.sh +++ b/AudioQnA/tests/test_gmc_on_xeon.sh @@ -34,7 +34,7 @@ function validate_audioqa() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='audioqa')].status.accessUrl}") - byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_new_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) + byte_str=$(kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -s -X POST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "parameters":{"max_tokens":64, "do_sample": true, "streaming":false}}' -H 'Content-Type: application/json' | jq .byte_str) echo "$byte_str" > $LOG_PATH/curl_audioqa.log if [ -z "$byte_str" ]; then echo "audioqa failed, please check the logs in ${LOG_PATH}!" diff --git a/ChatQnA/README.md b/ChatQnA/README.md index fa7156ad0..4f56abf05 100644 --- a/ChatQnA/README.md +++ b/ChatQnA/README.md @@ -245,7 +245,9 @@ Refer to the [AI PC Guide](./docker_compose/intel/cpu/aipc/README.md) for instru Refer to the [Intel Technology enabling for Openshift readme](https://github.com/intel/intel-technology-enabling-for-openshift/blob/main/workloads/opea/chatqna/README.md) for instructions to deploy ChatQnA prototype on RHOCP with [Red Hat OpenShift AI (RHOAI)](https://www.redhat.com/en/technologies/cloud-computing/openshift/openshift-ai). -## Consume ChatQnA Service +## Consume ChatQnA Service with RAG + +### Check Service Status Before consuming ChatQnA Service, make sure the TGI/vLLM service is ready (which takes up to 2 minutes to start). @@ -260,6 +262,23 @@ Consume ChatQnA service until you get the TGI response like below. 2024-09-03T02:47:53.402023Z INFO text_generation_router::server: router/src/server.rs:2311: Connected ``` +### Upload RAG Files (Optional) + +To chat with retrieved information, you need to upload a file using `Dataprep` service. + +Here is an example of `Nike 2023` pdf. + +```bash +# download pdf file +wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/redis/data/nke-10k-2023.pdf +# upload pdf file with dataprep +curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" +``` + +### Consume Chat Service + Two ways of consuming ChatQnA Service: 1. Use cURL command on terminal diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml deleted file mode 100644 index c447bcec2..000000000 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ /dev/null @@ -1,641 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVICE_HOST_IP: embedding-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 -kind: ConfigMap -metadata: - name: qna-config - namespace: default ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - ports: - - containerPort: 8888 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - ports: - - containerPort: 6007 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: dataprep-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: dataprep-svc - namespace: default -spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - imagePullPolicy: IfNotPresent - name: embedding-dependency-deploy - ports: - - containerPort: 80 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - ports: - - containerPort: 6000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-svc - namespace: default -spec: - ports: - - name: service - port: 6000 - targetPort: 6000 - selector: - app: embedding-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 31 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - imagePullPolicy: IfNotPresent - name: llm-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - capabilities: - add: - - SYS_NICE - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - ports: - - containerPort: 9000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-svc - namespace: default -spec: - ports: - - name: service - port: 9000 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: reranking-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - ports: - - containerPort: 8000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-svc - namespace: default -spec: - ports: - - name: service - port: 8000 - targetPort: 8000 - selector: - app: reranking-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - ports: - - containerPort: 7000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: retriever-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: retriever-svc - namespace: default -spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: vector-db - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: redis/redis-stack:7.2.0-v9 - imagePullPolicy: IfNotPresent - name: vector-db - ports: - - containerPort: 6379 - - containerPort: 8001 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: vector-db - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP ---- diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml deleted file mode 100644 index 859568ef9..000000000 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ /dev/null @@ -1,641 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVICE_HOST_IP: embedding-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 -kind: ConfigMap -metadata: - name: qna-config - namespace: default ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - ports: - - containerPort: 8888 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - ports: - - containerPort: 6007 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: dataprep-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: dataprep-svc - namespace: default -spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - imagePullPolicy: IfNotPresent - name: embedding-dependency-deploy - ports: - - containerPort: 80 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - ports: - - containerPort: 6000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-svc - namespace: default -spec: - ports: - - name: service - port: 6000 - targetPort: 6000 - selector: - app: embedding-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - imagePullPolicy: IfNotPresent - name: llm-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - capabilities: - add: - - SYS_NICE - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - ports: - - containerPort: 9000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-svc - namespace: default -spec: - ports: - - name: service - port: 9000 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: reranking-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - ports: - - containerPort: 8000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-svc - namespace: default -spec: - ports: - - name: service - port: 8000 - targetPort: 8000 - selector: - app: reranking-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - ports: - - containerPort: 7000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: retriever-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: retriever-svc - namespace: default -spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: vector-db - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: redis/redis-stack:7.2.0-v9 - imagePullPolicy: IfNotPresent - name: vector-db - ports: - - containerPort: 6379 - - containerPort: 8001 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: vector-db - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP ---- diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml deleted file mode 100644 index b64263be1..000000000 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ /dev/null @@ -1,641 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVICE_HOST_IP: embedding-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 -kind: ConfigMap -metadata: - name: qna-config - namespace: default ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - ports: - - containerPort: 8888 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - ports: - - containerPort: 6007 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: dataprep-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: dataprep-svc - namespace: default -spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - imagePullPolicy: IfNotPresent - name: embedding-dependency-deploy - ports: - - containerPort: 80 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - ports: - - containerPort: 6000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-svc - namespace: default -spec: - ports: - - name: service - port: 6000 - targetPort: 6000 - selector: - app: embedding-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 15 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - imagePullPolicy: IfNotPresent - name: llm-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - capabilities: - add: - - SYS_NICE - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - ports: - - containerPort: 9000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-svc - namespace: default -spec: - ports: - - name: service - port: 9000 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: reranking-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - ports: - - containerPort: 8000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-svc - namespace: default -spec: - ports: - - name: service - port: 8000 - targetPort: 8000 - selector: - app: reranking-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - ports: - - containerPort: 7000 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: retriever-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: retriever-svc - namespace: default -spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: vector-db - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: redis/redis-stack:7.2.0-v9 - imagePullPolicy: IfNotPresent - name: vector-db - ports: - - containerPort: 6379 - - containerPort: 8001 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: vector-db - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP ---- diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml deleted file mode 100644 index 6869a78f1..000000000 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml +++ /dev/null @@ -1,730 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc - namespace: default -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 32 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - args: null - ports: - - containerPort: 8000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-svc - namespace: default -spec: - type: ClusterIP - selector: - app: reranking-deploy - ports: - - name: service - port: 8000 - targetPort: 8000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc - namespace: default -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - - ---- diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml deleted file mode 100644 index f38efbeb6..000000000 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml +++ /dev/null @@ -1,579 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc - namespace: default -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 8 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc - namespace: default -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - - ---- diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml deleted file mode 100644 index a43553dda..000000000 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml +++ /dev/null @@ -1,579 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc - namespace: default -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 16 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc - namespace: default -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - - ---- diff --git a/ChatQnA/benchmark/README.md b/ChatQnA/benchmark/performance/README.md similarity index 95% rename from ChatQnA/benchmark/README.md rename to ChatQnA/benchmark/performance/README.md index 68347a02d..9ca756028 100644 --- a/ChatQnA/benchmark/README.md +++ b/ChatQnA/benchmark/performance/README.md @@ -67,7 +67,7 @@ We have created the [BKC manifest](https://github.com/opea-project/GenAIExamples ```bash # on k8s-master node git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/ChatQnA/benchmark +cd GenAIExamples/ChatQnA/benchmark/performance # replace the image tag from latest to v0.9 since we want to test with v0.9 release IMAGE_TAG=v0.9 @@ -148,7 +148,7 @@ Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/Cha ```bash # on k8s-master node -cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi +cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi kubectl apply -f . ``` @@ -210,7 +210,7 @@ All the test results will come to this folder `/home/sdp/benchmark_output/node_1 ```bash # on k8s-master node -cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi +cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi kubectl delete -f . kubectl label nodes k8s-worker1 node-type- ``` @@ -231,7 +231,7 @@ Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/Cha ```bash # on k8s-master node -cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/two_gaudi +cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi kubectl apply -f . ``` @@ -280,7 +280,7 @@ Go to [BKC manifest](https://github.com/opea-project/GenAIExamples/tree/main/Cha ```bash # on k8s-master node -cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/four_gaudi +cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi kubectl apply -f . ``` @@ -309,7 +309,7 @@ All the test results will come to this folder `/home/sdp/benchmark_output/node_4 ```bash # on k8s-master node -cd GenAIExamples/ChatQnA/benchmark/tuned/with_rerank/single_gaudi +cd GenAIExamples/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi kubectl delete -f . kubectl label nodes k8s-master k8s-worker1 k8s-worker2 k8s-worker3 node-type- ``` diff --git a/ChatQnA/benchmark/benchmark.yaml b/ChatQnA/benchmark/performance/benchmark.yaml similarity index 98% rename from ChatQnA/benchmark/benchmark.yaml rename to ChatQnA/benchmark/performance/benchmark.yaml index f1eb86a37..851a3e11a 100644 --- a/ChatQnA/benchmark/benchmark.yaml +++ b/ChatQnA/benchmark/performance/benchmark.yaml @@ -41,7 +41,7 @@ test_cases: run_test: false service_name: "llm-svc" # Replace with your service name parameters: - max_new_tokens: 128 + max_tokens: 128 temperature: 0.01 top_k: 10 top_p: 0.95 diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/oob/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/oob/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/oob/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/oob/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/oob/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/oob/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/oob/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/oob/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/oob/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/oob/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/oob/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/oob/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/oob/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/oob/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/oob/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/oob/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/tuned/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/tuned/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/tuned/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/tuned/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/performance/tuned/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/performance/tuned/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml deleted file mode 100644 index 1158bada9..000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ /dev/null @@ -1,675 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVICE_HOST_IP: embedding-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 -kind: ConfigMap -metadata: - name: qna-config - namespace: default ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 8000Mi - requests: - cpu: 8 - memory: 8000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - ports: - - containerPort: 6007 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: dataprep-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: dataprep-svc - namespace: default -spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - imagePullPolicy: IfNotPresent - name: embedding-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - cpu: 76 - memory: 20000Mi - requests: - cpu: 76 - memory: 20000Mi - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - ports: - - containerPort: 6000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-svc - namespace: default -spec: - ports: - - name: service - port: 6000 - targetPort: 6000 - selector: - app: embedding-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 31 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - imagePullPolicy: IfNotPresent - name: llm-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - capabilities: - add: - - SYS_NICE - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - ports: - - containerPort: 9000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-svc - namespace: default -spec: - ports: - - name: service - port: 9000 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: reranking-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - ports: - - containerPort: 8000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-svc - namespace: default -spec: - ports: - - name: service - port: 8000 - targetPort: 8000 - selector: - app: reranking-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - ports: - - containerPort: 7000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: retriever-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: retriever-svc - namespace: default -spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: vector-db - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: redis/redis-stack:7.2.0-v9 - imagePullPolicy: IfNotPresent - name: vector-db - ports: - - containerPort: 6379 - - containerPort: 8001 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: vector-db - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP ---- diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml deleted file mode 100644 index e40977213..000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ /dev/null @@ -1,675 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVICE_HOST_IP: embedding-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 -kind: ConfigMap -metadata: - name: qna-config - namespace: default ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 8000Mi - requests: - cpu: 8 - memory: 8000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - ports: - - containerPort: 6007 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: dataprep-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: dataprep-svc - namespace: default -spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - imagePullPolicy: IfNotPresent - name: embedding-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - cpu: 76 - memory: 20000Mi - requests: - cpu: 76 - memory: 20000Mi - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - ports: - - containerPort: 6000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-svc - namespace: default -spec: - ports: - - name: service - port: 6000 - targetPort: 6000 - selector: - app: embedding-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - imagePullPolicy: IfNotPresent - name: llm-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - capabilities: - add: - - SYS_NICE - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - ports: - - containerPort: 9000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-svc - namespace: default -spec: - ports: - - name: service - port: 9000 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: reranking-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - ports: - - containerPort: 8000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-svc - namespace: default -spec: - ports: - - name: service - port: 8000 - targetPort: 8000 - selector: - app: reranking-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - ports: - - containerPort: 7000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: retriever-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: retriever-svc - namespace: default -spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: vector-db - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: redis/redis-stack:7.2.0-v9 - imagePullPolicy: IfNotPresent - name: vector-db - ports: - - containerPort: 6379 - - containerPort: 8001 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: vector-db - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP ---- diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml deleted file mode 100644 index 2a54e1ca6..000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ /dev/null @@ -1,675 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVICE_HOST_IP: embedding-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 -kind: ConfigMap -metadata: - name: qna-config - namespace: default ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 8000Mi - requests: - cpu: 8 - memory: 8000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - ports: - - containerPort: 6007 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: dataprep-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: dataprep-svc - namespace: default -spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - imagePullPolicy: IfNotPresent - name: embedding-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - cpu: 76 - memory: 20000Mi - requests: - cpu: 76 - memory: 20000Mi - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - ports: - - containerPort: 6000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-svc - namespace: default -spec: - ports: - - name: service - port: 6000 - targetPort: 6000 - selector: - app: embedding-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 15 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - imagePullPolicy: IfNotPresent - name: llm-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - capabilities: - add: - - SYS_NICE - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - ports: - - containerPort: 9000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-svc - namespace: default -spec: - ports: - - name: service - port: 9000 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: reranking-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - ports: - - containerPort: 8000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-svc - namespace: default -spec: - ports: - - name: service - port: 8000 - targetPort: 8000 - selector: - app: reranking-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - ports: - - containerPort: 7000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: retriever-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: retriever-svc - namespace: default -spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: vector-db - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: redis/redis-stack:7.2.0-v9 - imagePullPolicy: IfNotPresent - name: vector-db - ports: - - containerPort: 6379 - - containerPort: 8001 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: vector-db - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP ---- diff --git a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml deleted file mode 100644 index ad0d8ec55..000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/four_gaudi/tuned_four_gaudi_without_rerank.yaml +++ /dev/null @@ -1,614 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc - namespace: default -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 76 - memory: 20000Mi - requests: - cpu: 76 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 32 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 4 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc - namespace: default -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - - ---- diff --git a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml deleted file mode 100644 index 0a2bdd525..000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/single_gaudi/tuned_single_gaudi_without_rerank.yaml +++ /dev/null @@ -1,614 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc - namespace: default -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 76 - memory: 20000Mi - requests: - cpu: 76 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 8 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc - namespace: default -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - - ---- diff --git a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml deleted file mode 100644 index 9a4554d9f..000000000 --- a/ChatQnA/benchmark/tuned/without_rerank/two_gaudi/tuned_two_gaudi_without_rerank.yaml +++ /dev/null @@ -1,614 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna-without-rerank:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 4000Mi - requests: - cpu: 8 - memory: 4000Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc - namespace: default -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - cpu: 76 - memory: 20000Mi - requests: - cpu: 76 - memory: 20000Mi - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - args: null - ports: - - containerPort: 6000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-svc - namespace: default -spec: - type: ClusterIP - selector: - app: embedding-deploy - ports: - - name: service - port: 6000 - targetPort: 6000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 16 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - args: null - ports: - - containerPort: 9000 - resources: - limits: - cpu: 4 - requests: - cpu: 4 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-svc - namespace: default -spec: - type: ClusterIP - selector: - app: llm-deploy - ports: - - name: service - port: 9000 - targetPort: 9000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 2 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - resources: - limits: - cpu: 8 - memory: 2500Mi - requests: - cpu: 8 - memory: 2500Mi - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc - namespace: default -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 - - ---- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - - ---- diff --git a/ChatQnA/chatqna_no_wrapper.py b/ChatQnA/chatqna_no_wrapper.py index 2780c7486..c08c6a2f3 100644 --- a/ChatQnA/chatqna_no_wrapper.py +++ b/ChatQnA/chatqna_no_wrapper.py @@ -69,10 +69,12 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k next_inputs = {} next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}] - next_inputs["max_tokens"] = llm_parameters_dict["max_new_tokens"] + next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"] next_inputs["top_p"] = llm_parameters_dict["top_p"] next_inputs["stream"] = inputs["streaming"] - next_inputs["frequency_penalty"] = inputs["repetition_penalty"] + next_inputs["frequency_penalty"] = inputs["frequency_penalty"] + next_inputs["presence_penalty"] = inputs["presence_penalty"] + next_inputs["repetition_penalty"] = inputs["repetition_penalty"] next_inputs["temperature"] = inputs["temperature"] inputs = next_inputs diff --git a/ChatQnA/docker_compose/intel/cpu/aipc/README.md b/ChatQnA/docker_compose/intel/cpu/aipc/README.md index 3c28d1c10..9b13d8185 100644 --- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md +++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md @@ -229,7 +229,7 @@ OLLAMA_HOST=${host_ip}:11434 ollama run $OLLAMA_MODEL ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md index 7eb75431a..5eca0d284 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md @@ -438,18 +438,31 @@ docker compose -f compose_vllm.yaml up -d This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup. ```bash + # TGI service curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` + For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".) + + ```bash + # vLLM Service + curl http://${your_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \ + -H 'Content-Type: application/json' + ``` + + For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html) + 8. MegaService ```bash - curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ - "messages": "What is the revenue of Nike in 2023?" - }' + curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' ``` 9. Dataprep Microservice(Optional) diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md index 25ba15c3f..c11ab8e9f 100644 --- a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md +++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md @@ -304,7 +304,7 @@ docker compose -f compose_qdrant.yaml up -d ```bash curl http://${host_ip}:6047/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md index bc41c782a..ec8e3ad09 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md @@ -442,18 +442,41 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid 7. LLM Microservice ```bash + # TGI service + curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' + ``` + + For parameters in TGI mode, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".) + + ```bash + # vLLM Service curl http://${host_ip}:9000/v1/chat/completions \ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \ + -H 'Content-Type: application/json' + ``` + + For parameters in vLLM Mode, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html) + + ```bash + # vLLM-on-Ray Service + curl http://${your_ip}:9000/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"presence_penalty":1.03","streaming":false}' \ -H 'Content-Type: application/json' ``` + For parameters in vLLM-on-Ray mode, can refer to [LangChain ChatOpenAI API](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html) + 8. MegaService ```bash curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ - "messages": "What is the revenue of Nike in 2023?" - }' + "messages": "What is the revenue of Nike in 2023?" + }' ``` 9. Dataprep Microservice(Optional) diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md index 2e2d3d023..8ada1e525 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md @@ -278,7 +278,7 @@ and the log shows model warm up, please wait for a while and try it later. ``` curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md index cfdda158f..7e3966a7f 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/README.md +++ b/ChatQnA/docker_compose/nvidia/gpu/README.md @@ -280,7 +280,7 @@ docker compose up -d ```bash curl http://${host_ip}:9000/v1/chat/completions \ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/CodeGen/README.md b/CodeGen/README.md index bc93ff473..fcf0f3e33 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -43,6 +43,8 @@ By default, the LLM model is set to a default value as listed below: [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) is a gated model that requires submitting an access request through Hugging Face. You can replace it with another model. Change the `LLM_MODEL_ID` below for your needs, such as: [Qwen/CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat), [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) +If you choose to use `meta-llama/CodeLlama-7b-hf` as LLM model, you will need to visit [here](https://huggingface.co/meta-llama/CodeLlama-7b-hf), click the `Expand to review and access` button to ask for model access. + ### Setup Environment Variable To set up environment variables for deploying ChatQnA services, follow these steps: @@ -132,10 +134,13 @@ Two ways of consuming CodeGen Service: http_proxy="" curl http://${host_ip}:8028/generate \ -X POST \ - -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' \ + -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` -2. (Docker only) If all microservices work well, check the port ${host_ip}:7778, the port may be allocated by other users, you can modify the `compose.yaml`. +2. If you get errors like "aiohttp.client_exceptions.ClientConnectorError: Cannot connect to host xx.xx.xx.xx:8028", check the `tgi service` first. If there is "Cannot access gated repo for url + https://huggingface.co/meta-llama/CodeLlama-7b-hf/resolve/main/config.json." error of `tgi service`, Then you need to ask for model access first. Follow the instruction in the [Required Models](#required-models) section for more information. + +3. (Docker only) If all microservices work well, check the port ${host_ip}:7778, the port may be allocated by other users, you can modify the `compose.yaml`. -3. (Docker only) If you get errors like "The container name is in use", change container name in `compose.yaml`. +4. (Docker only) If you get errors like "The container name is in use", change container name in `compose.yaml`. diff --git a/CodeGen/benchmark/accuracy/README.md b/CodeGen/benchmark/accuracy/README.md new file mode 100644 index 000000000..16d21e1a3 --- /dev/null +++ b/CodeGen/benchmark/accuracy/README.md @@ -0,0 +1,100 @@ +# CodeGen accuracy Evaluation + +## Evaluation Framework + +We evaluate accuracy by [bigcode-evaluation-harness](https://github.com/bigcode-project/bigcode-evaluation-harness). It is a framework for the evaluation of code generation models. + +## Evaluation FAQs + +### Launch CodeGen microservice + +Please refer to [CodeGen Examples](https://github.com/opea-project/GenAIExamples/tree/main/CodeGen), follow the guide to deploy CodeGen megeservice. + +Use `curl` command to test codegen service and ensure that it has started properly + +```bash +export CODEGEN_ENDPOINT = "http://${your_ip}:7778/v1/codegen" +curl $CODEGEN_ENDPOINT \ + -H "Content-Type: application/json" \ + -d '{"messages": "Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception."}' + +``` + +### Generation and Evaluation + +For evaluating the models on coding tasks or specifically coding LLMs, we follow the [bigcode-evaluation-harness](https://github.com/bigcode-project/bigcode-evaluation-harness) and provide the command line usage and function call usage. [HumanEval](https://huggingface.co/datasets/openai_humaneval), [HumanEval+](https://huggingface.co/datasets/evalplus/humanevalplus), [InstructHumanEval](https://huggingface.co/datasets/codeparrot/instructhumaneval), [APPS](https://huggingface.co/datasets/codeparrot/apps), [MBPP](https://huggingface.co/datasets/mbpp), [MBPP+](https://huggingface.co/datasets/evalplus/mbppplus), and [DS-1000](https://github.com/HKUNLP/DS-1000/) for both completion (left-to-right) and insertion (FIM) mode are available. + +#### command line usage + +```shell +git clone https://github.com/opea-project/GenAIEval +cd GenAIEval +pip install -r requirements.txt +pip install -e . + +cd evals/evaluation/bigcode_evaluation_harness/examples +python main.py --model Qwen/CodeQwen1.5-7B-Chat \ + --tasks humaneval \ + --codegen_url $CODEGEN_ENDPOINT \ + --max_length_generation 2048 \ + --batch_size 1 \ + --save_generations \ + --save_references \ + --allow_code_execution +``` + +**_Note:_** Currently, our framework is designed to execute tasks in full. To ensure the accuracy of results, we advise against using the 'limit' or 'limit_start' parameters to restrict the number of test samples. + +### accuracy Result + +Here is the tested result for your reference + +```json +{ + "humaneval": { + "pass@1": 0.7195121951219512 + }, + "config": { + "prefix": "", + "do_sample": true, + "temperature": 0.2, + "top_k": 0, + "top_p": 0.95, + "n_samples": 1, + "eos": "<|endoftext|>", + "seed": 0, + "model": "Qwen/CodeQwen1.5-7B-Chat", + "modeltype": "causal", + "peft_model": null, + "revision": null, + "use_auth_token": false, + "trust_remote_code": false, + "tasks": "humaneval", + "instruction_tokens": null, + "batch_size": 1, + "max_length_generation": 2048, + "precision": "fp32", + "load_in_8bit": false, + "load_in_4bit": false, + "left_padding": false, + "limit": null, + "limit_start": 0, + "save_every_k_tasks": -1, + "postprocess": true, + "allow_code_execution": true, + "generation_only": false, + "load_generations_path": null, + "load_data_path": null, + "metric_output_path": "evaluation_results.json", + "save_generations": true, + "load_generations_intermediate_paths": null, + "save_generations_path": "generations.json", + "save_references": true, + "save_references_path": "references.json", + "prompt": "prompt", + "max_memory_per_gpu": null, + "check_references": false, + "codegen_url": "http://192.168.123.104:31234/v1/codegen" + } +} +``` diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index d7dc3376e..8bdde1f75 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -138,7 +138,7 @@ docker compose up -d ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_new_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 74afd54ae..2a5040ea0 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -119,7 +119,7 @@ docker compose up -d ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_new_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","max_tokens":256,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/CodeGen/tests/test_gmc_on_gaudi.sh b/CodeGen/tests/test_gmc_on_gaudi.sh index ad16e2108..805237208 100755 --- a/CodeGen/tests/test_gmc_on_gaudi.sh +++ b/CodeGen/tests/test_gmc_on_gaudi.sh @@ -34,7 +34,7 @@ function validate_codegen() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!" diff --git a/CodeGen/tests/test_gmc_on_xeon.sh b/CodeGen/tests/test_gmc_on_xeon.sh index 92f620365..5f3ff0eae 100755 --- a/CodeGen/tests/test_gmc_on_xeon.sh +++ b/CodeGen/tests/test_gmc_on_xeon.sh @@ -34,7 +34,7 @@ function validate_codegen() { export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) echo "$CLIENT_POD" accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_new_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log + kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"inputs":"Implement a high-level API for a TODO list application. The API takes as input an operation request and updates the TODO list in place. If the request is invalid, raise an exception.","parameters":{"max_tokens":256, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log exit_code=$? if [ $exit_code -ne 0 ]; then echo "chatqna failed, please check the logs in ${LOG_PATH}!" diff --git a/CodeTrans/README.md b/CodeTrans/README.md index 0a00ca902..a1b95b154 100644 --- a/CodeTrans/README.md +++ b/CodeTrans/README.md @@ -127,7 +127,7 @@ By default, the UI runs on port 5173 internally. http_proxy="" curl http://${host_ip}:8008/generate \ -X POST \ - -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -d '{"inputs":" ### System: Please translate the following Golang codes into Python codes. ### Original codes: '\'''\'''\''Golang \npackage main\n\nimport \"fmt\"\nfunc main() {\n fmt.Println(\"Hello, World!\");\n '\'''\'''\'' ### Translated codes:","parameters":{"max_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` diff --git a/DocSum/README.md b/DocSum/README.md index 23d662987..ca1ebfeba 100644 --- a/DocSum/README.md +++ b/DocSum/README.md @@ -147,9 +147,9 @@ Two ways of consuming Document Summarization Service: ```bash http_proxy="" - curl http://${your_ip}:8008/generate \ + curl http://${host_ip}:8008/generate \ -X POST \ - -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' ``` diff --git a/DocSum/docker_compose/intel/cpu/xeon/README.md b/DocSum/docker_compose/intel/cpu/xeon/README.md index 4085365be..a067e9e27 100644 --- a/DocSum/docker_compose/intel/cpu/xeon/README.md +++ b/DocSum/docker_compose/intel/cpu/xeon/README.md @@ -105,7 +105,7 @@ docker compose up -d 1. TGI Service ```bash - curl http://${your_ip}:8008/generate \ + curl http://${host_ip}:8008/generate \ -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ -H 'Content-Type: application/json' @@ -114,7 +114,7 @@ docker compose up -d 2. LLM Microservice ```bash - curl http://${your_ip}:9000/v1/chat/docsum \ + curl http://${host_ip}:9000/v1/chat/docsum \ -X POST \ -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' diff --git a/DocSum/docker_compose/intel/hpu/gaudi/README.md b/DocSum/docker_compose/intel/hpu/gaudi/README.md index 8ef3b2916..abb4a9bed 100644 --- a/DocSum/docker_compose/intel/hpu/gaudi/README.md +++ b/DocSum/docker_compose/intel/hpu/gaudi/README.md @@ -96,7 +96,7 @@ docker compose up -d 1. TGI Service ```bash - curl http://${your_ip}:8008/generate \ + curl http://${host_ip}:8008/generate \ -X POST \ -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":64, "do_sample": true}}' \ -H 'Content-Type: application/json' @@ -105,7 +105,7 @@ docker compose up -d 2. LLM Microservice ```bash - curl http://${your_ip}:9000/v1/chat/docsum \ + curl http://${host_ip}:9000/v1/chat/docsum \ -X POST \ -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \ -H 'Content-Type: application/json' diff --git a/FaqGen/benchmark/accuracy/README.md b/FaqGen/benchmark/accuracy/README.md new file mode 100644 index 000000000..1c180c395 --- /dev/null +++ b/FaqGen/benchmark/accuracy/README.md @@ -0,0 +1,78 @@ +# FaqGen Evaluation + +## Dataset + +We evaluate performance on QA dataset [Squad_v2](https://huggingface.co/datasets/rajpurkar/squad_v2). Generate FAQs on "context" columns in validation dataset, which contains 1204 unique records. + +First download dataset and put at "./data". + +Extract unique "context" columns, which will be save to 'data/sqv2_context.json': + +``` +python get_context.py +``` + +## Generate FAQs + +### Launch FaQGen microservice + +Please refer to [FaQGen microservice](https://github.com/opea-project/GenAIComps/tree/main/comps/llms/faq-generation/tgi), set up an microservice endpoint. + +``` +export FAQ_ENDPOINT = "http://${your_ip}:9000/v1/faqgen" +``` + +### Generate FAQs with microservice + +Use the microservice endpoint to generate FAQs for dataset. + +``` +python generate_FAQ.py +``` + +Post-process the output to get the right data, which will be save to 'data/sqv2_faq.json'. + +``` +python post_process_FAQ.py +``` + +## Evaluate with Ragas + +### Launch TGI service + +We use "mistralai/Mixtral-8x7B-Instruct-v0.1" as LLM referee to evaluate the model. First we need to launch a LLM endpoint on Gaudi. + +``` +export HUGGING_FACE_HUB_TOKEN="your_huggingface_token" +bash launch_tgi.sh +``` + +Get the endpoint: + +``` +export LLM_ENDPOINT = "http://${ip_address}:8082" +``` + +Verify the service: + +```bash +curl http://${ip_address}:8082/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":128}}' \ + -H 'Content-Type: application/json' +``` + +### Evaluate + +evaluate the performance with the LLM: + +``` +python evaluate.py +``` + +### Performance Result + +Here is the tested result for your reference +| answer_relevancy | faithfulness | context_utilization | reference_free_rubrics_score | +| ---- | ---- |---- |---- | +| 0.7191 | 0.9681 | 0.8964 | 4.4125| diff --git a/FaqGen/benchmark/accuracy/evaluate.py b/FaqGen/benchmark/accuracy/evaluate.py new file mode 100644 index 000000000..30998da4d --- /dev/null +++ b/FaqGen/benchmark/accuracy/evaluate.py @@ -0,0 +1,44 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os + +from evals.metrics.ragas import RagasMetric +from langchain_community.embeddings import HuggingFaceBgeEmbeddings + +llm_endpoint = os.getenv("LLM_ENDPOINT", "http://0.0.0.0:8082") + +f = open("data/sqv2_context.json", "r") +sqv2_context = json.load(f) + +f = open("data/sqv2_faq.json", "r") +sqv2_faq = json.load(f) + +templ = """Create a concise FAQs (frequently asked questions and answers) for following text: + TEXT: {text} + Do not use any prefix or suffix to the FAQ. + """ + +number = 1204 +question = [] +answer = [] +ground_truth = ["None"] * number +contexts = [] +for i in range(number): + inputs = sqv2_context[str(i)] + inputs_faq = templ.format_map({"text": inputs}) + actual_output = sqv2_faq[str(i)] + + question.append(inputs_faq) + answer.append(actual_output) + contexts.append([inputs_faq]) + +embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5") +metrics_faq = ["answer_relevancy", "faithfulness", "context_utilization", "reference_free_rubrics_score"] +metric = RagasMetric(threshold=0.5, model=llm_endpoint, embeddings=embeddings, metrics=metrics_faq) + +test_case = {"question": question, "answer": answer, "ground_truth": ground_truth, "contexts": contexts} + +metric.measure(test_case) +print(metric.score) diff --git a/FaqGen/benchmark/accuracy/generate_FAQ.py b/FaqGen/benchmark/accuracy/generate_FAQ.py new file mode 100644 index 000000000..2ed70b9ef --- /dev/null +++ b/FaqGen/benchmark/accuracy/generate_FAQ.py @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +import time + +import requests + +llm_endpoint = os.getenv("FAQ_ENDPOINT", "http://0.0.0.0:9000/v1/faqgen") + +f = open("data/sqv2_context.json", "r") +sqv2_context = json.load(f) + +start_time = time.time() +headers = {"Content-Type": "application/json"} +for i in range(1204): + start_time_tmp = time.time() + print(i) + inputs = sqv2_context[str(i)] + data = {"query": inputs, "max_new_tokens": 128} + response = requests.post(llm_endpoint, json=data, headers=headers) + f = open(f"data/result/sqv2_faq_{i}", "w") + f.write(inputs) + f.write(str(response.content, encoding="utf-8")) + f.close() + print(f"Cost {time.time()-start_time_tmp} seconds") +print(f"\n Finished! \n Totally Cost {time.time()-start_time} seconds\n") diff --git a/FaqGen/benchmark/accuracy/get_context.py b/FaqGen/benchmark/accuracy/get_context.py new file mode 100644 index 000000000..8cb73a054 --- /dev/null +++ b/FaqGen/benchmark/accuracy/get_context.py @@ -0,0 +1,17 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json +import os + +import pandas as pd + +data_path = "./data" +data = pd.read_parquet(os.path.join(data_path, "squad_v2/squad_v2/validation-00000-of-00001.parquet")) +sq_context = list(data["context"].unique()) +sq_context_d = dict() +for i in range(len(sq_context)): + sq_context_d[i] = sq_context[i] + +with open(os.path.join(data_path, "sqv2_context.json"), "w") as outfile: + json.dump(sq_context_d, outfile) diff --git a/FaqGen/benchmark/accuracy/launch_tgi.sh b/FaqGen/benchmark/accuracy/launch_tgi.sh new file mode 100644 index 000000000..b3e04bbbf --- /dev/null +++ b/FaqGen/benchmark/accuracy/launch_tgi.sh @@ -0,0 +1,28 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +max_input_tokens=3072 +max_total_tokens=4096 +port_number=8082 +model_name="mistralai/Mixtral-8x7B-Instruct-v0.1" +volume="./data" +docker run -it --rm \ + --name="tgi_Mixtral" \ + -p $port_number:80 \ + -v $volume:/data \ + --runtime=habana \ + --restart always \ + -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \ + -e HABANA_VISIBLE_DEVICES=all \ + -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ + -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true \ + --cap-add=sys_nice \ + --ipc=host \ + -e HTTPS_PROXY=$https_proxy \ + -e HTTP_PROXY=$https_proxy \ + ghcr.io/huggingface/tgi-gaudi:2.0.1 \ + --model-id $model_name \ + --max-input-tokens $max_input_tokens \ + --max-total-tokens $max_total_tokens \ + --sharded true \ + --num-shard 2 diff --git a/FaqGen/benchmark/accuracy/post_process_FAQ.py b/FaqGen/benchmark/accuracy/post_process_FAQ.py new file mode 100644 index 000000000..83e6b8350 --- /dev/null +++ b/FaqGen/benchmark/accuracy/post_process_FAQ.py @@ -0,0 +1,27 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import json + +faq_dict = {} +fails = [] +for i in range(1204): + data = open(f"data/result/sqv2_faq_{i}", "r").readlines() + result = data[-6][6:] + # print(result) + if "LLMChain/final_output" not in result: + print(f"error1: fail for {i}") + fails.append(i) + continue + try: + result2 = json.loads(result) + result3 = result2["ops"][0]["value"]["text"] + faq_dict[str(i)] = result3 + except: + print(f"error2: fail for {i}") + fails.append(i) + continue +with open("data/sqv2_faq.json", "w") as outfile: + json.dump(faq_dict, outfile) +print("Failure index:") +print(fails) diff --git a/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py b/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py index 65696cc54..3eba01a71 100644 --- a/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py +++ b/MultimodalQnA/ui/gradio/multimodalqna_ui_gradio.py @@ -25,6 +25,7 @@ display:block; } """ +tmp_upload_folder = "/tmp/gradio/" # create a FastAPI app app = FastAPI() @@ -122,11 +123,14 @@ def http_bot(state, request: gr.Request): video_file = metadata["source_video"] state.video_file = os.path.join(static_dir, metadata["source_video"]) state.time_of_frame_ms = metadata["time_of_frame_ms"] - splited_video_path = split_video( - state.video_file, state.time_of_frame_ms, tmp_dir, f"{state.time_of_frame_ms}__{video_file}" - ) + try: + splited_video_path = split_video( + state.video_file, state.time_of_frame_ms, tmp_dir, f"{state.time_of_frame_ms}__{video_file}" + ) + except: + print(f"video {state.video_file} does not exist in UI host!") + splited_video_path = None state.split_video = splited_video_path - print(splited_video_path) else: raise requests.exceptions.RequestException except requests.exceptions.RequestException as e: @@ -143,9 +147,19 @@ def http_bot(state, request: gr.Request): def ingest_video_gen_transcript(filepath, request: gr.Request): yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database...")) - basename = os.path.basename(filepath) + verified_filepath = os.path.normpath(filepath) + if not verified_filepath.startswith(tmp_upload_folder): + print("Found malicious video file name!") + yield ( + gr.Textbox( + visible=True, + value="Your uploaded video's file name has special characters that are not allowed. Please consider update the video file name!", + ) + ) + return + basename = os.path.basename(verified_filepath) dest = os.path.join(static_dir, basename) - shutil.copy(filepath, dest) + shutil.copy(verified_filepath, dest) print("Done copy uploaded file to static folder!") headers = { # 'Content-Type': 'multipart/form-data' @@ -185,9 +199,19 @@ def ingest_video_gen_transcript(filepath, request: gr.Request): def ingest_video_gen_caption(filepath, request: gr.Request): yield (gr.Textbox(visible=True, value="Please wait for ingesting your uploaded video into database...")) - basename = os.path.basename(filepath) + verified_filepath = os.path.normpath(filepath) + if not verified_filepath.startswith(tmp_upload_folder): + print("Found malicious video file name!") + yield ( + gr.Textbox( + visible=True, + value="Your uploaded video's file name has special characters that are not allowed. Please consider update the video file name!", + ) + ) + return + basename = os.path.basename(verified_filepath) dest = os.path.join(static_dir, basename) - shutil.copy(filepath, dest) + shutil.copy(verified_filepath, dest) print("Done copy uploaded file to static folder!") headers = { # 'Content-Type': 'multipart/form-data' diff --git a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md index dd1f59f27..312f191ff 100644 --- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md +++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md @@ -271,7 +271,7 @@ Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail ```bash curl http://${host_ip}:9000/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/SearchQnA/docker_compose/intel/cpu/xeon/README.md b/SearchQnA/docker_compose/intel/cpu/xeon/README.md index f31975ac6..5dbd77464 100644 --- a/SearchQnA/docker_compose/intel/cpu/xeon/README.md +++ b/SearchQnA/docker_compose/intel/cpu/xeon/README.md @@ -140,7 +140,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md index b34398c35..6021c7938 100644 --- a/SearchQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/SearchQnA/docker_compose/intel/hpu/gaudi/README.md @@ -150,7 +150,7 @@ curl http://${host_ip}:3006/generate \ # llm microservice curl http://${host_ip}:3007/v1/chat/completions\ -X POST \ - -d '{"query":"What is Deep Learning?","max_new_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ -H 'Content-Type: application/json' ``` diff --git a/Translation/docker_compose/intel/cpu/xeon/README.md b/Translation/docker_compose/intel/cpu/xeon/README.md index 306f8e35d..651b20950 100644 --- a/Translation/docker_compose/intel/cpu/xeon/README.md +++ b/Translation/docker_compose/intel/cpu/xeon/README.md @@ -10,9 +10,24 @@ For detailed information about these instance types, you can refer to this [link After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed. -## 🚀 Build Docker Images +## 🚀 Prepare Docker Images -First of all, you need to build Docker Images locally and install the python package of it. +For Docker Images, you have two options to prepare them. + +1. Pull the docker images from docker hub. + + - More stable to use. + - Will be automatically downloaded when using docker compose command. + +2. Build the docker images from source. + + - Contain the latest new features. + + - Need to be manually build. + +If you choose to pull docker images form docker hub, skip this section and go to [Start Microservices](#start-microservices) part directly. + +Follow the instructions below to build the docker images from source. ### 1. Build LLM Image @@ -45,7 +60,7 @@ docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy ```bash cd GenAIComps -docker build -t opea/translation-nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile . +docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile . ``` Then run the command `docker images`, you will have the following Docker Images: @@ -53,7 +68,7 @@ Then run the command `docker images`, you will have the following Docker Images: 1. `opea/llm-tgi:latest` 2. `opea/translation:latest` 3. `opea/translation-ui:latest` -4. `opea/translation-nginx:latest` +4. `opea/nginx:latest` ## 🚀 Start Microservices @@ -101,6 +116,15 @@ Change the `LLM_MODEL_ID` below for your needs. docker compose up -d ``` +> Note: The docker images will be automatically downloaded from `docker hub`: + +```bash +docker pull opea/llm-tgi:latest +docker pull opea/translation:latest +docker pull opea/translation-ui:latest +docker pull opea/nginx:latest +``` + ### Validate Microservices 1. TGI Service diff --git a/Translation/docker_compose/intel/cpu/xeon/compose.yaml b/Translation/docker_compose/intel/cpu/xeon/compose.yaml index e8eafca4f..108a5086d 100644 --- a/Translation/docker_compose/intel/cpu/xeon/compose.yaml +++ b/Translation/docker_compose/intel/cpu/xeon/compose.yaml @@ -66,7 +66,7 @@ services: ipc: host restart: always translation-xeon-nginx-server: - image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest} + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} container_name: translation-xeon-nginx-server depends_on: - translation-xeon-backend-server diff --git a/Translation/docker_compose/intel/hpu/gaudi/README.md b/Translation/docker_compose/intel/hpu/gaudi/README.md index 9f234496c..a9e807a12 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/README.md +++ b/Translation/docker_compose/intel/hpu/gaudi/README.md @@ -2,9 +2,24 @@ This document outlines the deployment process for a Translation application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service. -## 🚀 Build Docker Images +## 🚀 Prepare Docker Images -First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub. +For Docker Images, you have two options to prepare them. + +1. Pull the docker images from docker hub. + + - More stable to use. + - Will be automatically downloaded when using docker compose command. + +2. Build the docker images from source. + + - Contain the latest new features. + + - Need to be manually build. + +If you choose to pull docker images form docker hub, skip to [Start Microservices](#start-microservices) part directly. + +Follow the instructions below to build the docker images from source. ### 1. Build LLM Image @@ -37,7 +52,7 @@ docker build -t opea/translation-ui:latest --build-arg https_proxy=$https_proxy ```bash cd GenAIComps -docker build -t opea/translation-nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile . +docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile . ``` Then run the command `docker images`, you will have the following four Docker Images: @@ -45,7 +60,7 @@ Then run the command `docker images`, you will have the following four Docker Im 1. `opea/llm-tgi:latest` 2. `opea/translation:latest` 3. `opea/translation-ui:latest` -4. `opea/translation-nginx:latest` +4. `opea/nginx:latest` ## 🚀 Start Microservices @@ -93,6 +108,15 @@ Change the `LLM_MODEL_ID` below for your needs. docker compose up -d ``` +> Note: The docker images will be automatically downloaded from `docker hub`: + +```bash +docker pull opea/llm-tgi:latest +docker pull opea/translation:latest +docker pull opea/translation-ui:latest +docker pull opea/nginx:latest +``` + ### Validate Microservices 1. TGI Service diff --git a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml index 6eefd6492..3d8b0ab47 100644 --- a/Translation/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/Translation/docker_compose/intel/hpu/gaudi/compose.yaml @@ -67,7 +67,7 @@ services: ipc: host restart: always translation-gaudi-nginx-server: - image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest} + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} container_name: translation-gaudi-nginx-server depends_on: - translation-gaudi-backend-server diff --git a/Translation/docker_image_build/build.yaml b/Translation/docker_image_build/build.yaml index a1562060b..2230a8657 100644 --- a/Translation/docker_image_build/build.yaml +++ b/Translation/docker_image_build/build.yaml @@ -28,4 +28,4 @@ services: context: GenAIComps dockerfile: comps/nginx/Dockerfile extends: translation - image: ${REGISTRY:-opea}/translation-nginx:${TAG:-latest} + image: ${REGISTRY:-opea}/nginx:${TAG:-latest} diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md index 3a6058e0c..8f0d5b6b3 100644 --- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md +++ b/VisualQnA/docker_compose/intel/cpu/xeon/README.md @@ -138,28 +138,28 @@ Follow the instructions to validate MicroServices. 2. MegaService - ```bash - curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - ``` +```bash +curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + } + ], + "max_tokens": 300 + }' +``` ## 🚀 Launch the UI diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md index 2a8f3a276..84783353a 100644 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md @@ -95,28 +95,28 @@ Follow the instructions to validate MicroServices. 2. MegaService - ```bash - curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - ``` +```bash +curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What'\''s in this image?" + }, + { + "type": "image_url", + "image_url": { + "url": "https://www.ilankelman.org/stopsigns/australia.jpg" + } + } + ] + } + ], + "max_tokens": 300 + }' +``` ## 🚀 Launch the UI