File tree Expand file tree Collapse file tree 1 file changed +8
-4
lines changed
onnxruntime/core/providers/openvino Expand file tree Collapse file tree 1 file changed +8
-4
lines changed Original file line number Diff line number Diff line change @@ -360,10 +360,14 @@ void OVInferRequest::Infer() {
360360
361361StatefulOVInferRequest::StatefulOVInferRequest (ov::InferRequest infer_request, std::string device)
362362 : OVInferRequest(std::move(infer_request)), target_device(device) {
363- // bool gpu_or_npu = ((device.find("NPU") != std::string::npos) || (device.find("GPU") != std::string::npos));
364- // if (gpu_or_npu) {
365- // prefill_use_full_chat_history = true;
366- // }
363+ bool gpu_or_npu = ((device.find (" NPU" ) != std::string::npos) || (device.find (" GPU" ) != std::string::npos));
364+
365+ // check if there is input_ids tensors and if the tensor type is int64,
366+ // because logic prefill_use_full_chat_history is only for specific inputs and data type
367+ auto input_ids_opt = FindTensor (" input_ids" );
368+ if (gpu_or_npu && input_ids_opt.has_value () && input_ids_opt->get_element_type () != ov::element::i64 ) {
369+ prefill_use_full_chat_history = true ;
370+ }
367371}
368372
369373void StatefulOVInferRequest::FillTensor (const std::string& tensor_name, const ov::element::Type& type,
You can’t perform that action at this time.
0 commit comments