Skip to content

Commit

Permalink
update manifests for v0.9 (#632)
Browse files Browse the repository at this point in the history
* update model HF TOKEN variables & reranking name for v0.9
  • Loading branch information
Zhenzhong1 committed Aug 20, 2024
1 parent 01c1b75 commit ba78b4c
Show file tree
Hide file tree
Showing 11 changed files with 28 additions and 28 deletions.
4 changes: 2 additions & 2 deletions ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 6
replicas: 4
selector:
matchLabels:
app: embedding-dependency-deploy
Expand Down Expand Up @@ -48,7 +48,7 @@ spec:
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
8 changes: 4 additions & 4 deletions ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ spec:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
Expand All @@ -62,12 +62,12 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: $(HF_TOKEN)
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
6 changes: 3 additions & 3 deletions ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: tei_gaudi:rerank
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- --model-id
Expand All @@ -57,14 +57,14 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: $(HF_TOKEN)
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
2 changes: 1 addition & 1 deletion ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ data:
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ spec:
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
8 changes: 4 additions & 4 deletions ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ spec:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
Expand All @@ -62,12 +62,12 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: $(HF_TOKEN)
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
6 changes: 3 additions & 3 deletions ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: tei_gaudi:rerank
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- --model-id
Expand All @@ -57,14 +57,14 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: $(HF_TOKEN)
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
2 changes: 1 addition & 1 deletion ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ data:
TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
INDEX_NAME: rag-redis
HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN}
EMBEDDING_SERVICE_HOST_IP: embedding-svc
RETRIEVER_SERVICE_HOST_IP: retriever-svc
RERANK_SERVICE_HOST_IP: reranking-svc
Expand Down
4 changes: 2 additions & 2 deletions ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
name: embedding-dependency-deploy
namespace: default
spec:
replicas: 3
replicas: 2
selector:
matchLabels:
app: embedding-dependency-deploy
Expand Down Expand Up @@ -48,7 +48,7 @@ spec:
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
8 changes: 4 additions & 4 deletions ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ spec:
- --model-id
- $(LLM_MODEL_ID)
- --max-input-length
- '1024'
- --max-total-tokens
- '2048'
- --max-total-tokens
- '4096'
- --max-batch-total-tokens
- '65536'
- --max-batch-prefill-tokens
Expand All @@ -62,12 +62,12 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: $(HF_TOKEN)
value: ${HF_TOKEN}
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down
6 changes: 3 additions & 3 deletions ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ spec:
- envFrom:
- configMapRef:
name: qna-config
image: tei_gaudi:rerank
image: opea/tei-gaudi:latest
name: reranking-dependency-deploy
args:
- --model-id
Expand All @@ -57,14 +57,14 @@ spec:
- name: HABANA_VISIBLE_DEVICES
value: all
- name: HF_TOKEN
value: $(HF_TOKEN)
value: ${HF_TOKEN}
- name: MAX_WARMUP_SEQUENCE_LENGTH
value: '512'
serviceAccountName: default
volumes:
- name: model-volume
hostPath:
path: /home/sdp/cesg
path: /mnt/models
type: Directory
- name: shm
emptyDir:
Expand Down

0 comments on commit ba78b4c

Please sign in to comment.