diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index 85866573f..c447bcec2 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -275,10 +275,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index eb63aada5..859568ef9 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -275,10 +275,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index 1b9bf7ebf..b64263be1 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -275,10 +275,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none diff --git a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml index e010496b8..6869a78f1 100644 --- a/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/four_gaudi/oob_four_gaudi_without_rerank.yaml @@ -336,10 +336,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml index b75e8f291..f38efbeb6 100644 --- a/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/single_gaudi/oob_single_gaudi_without_rerank.yaml @@ -336,10 +336,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' volumeMounts: - mountPath: /data name: model-volume diff --git a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml index fa62ef84d..a43553dda 100644 --- a/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob/without_rerank/two_gaudi/oob_two_gaudi_without_rerank.yaml @@ -336,10 +336,6 @@ spec: - '2048' - --max-total-tokens - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' volumeMounts: - mountPath: /data name: model-volume