diff --git a/src/fastertransformer/models/gptneox/GptNeoX.cc b/src/fastertransformer/models/gptneox/GptNeoX.cc index 2ce2dae7b..e638a91e5 100644 --- a/src/fastertransformer/models/gptneox/GptNeoX.cc +++ b/src/fastertransformer/models/gptneox/GptNeoX.cc @@ -123,7 +123,7 @@ void GptNeoX::allocateBuffer( prompt_learning_weight_batch_ = (const T**)(allocator_->reMalloc(prompt_learning_weight_batch_, sizeof(T*) * batchxbeam, false)); tiled_prompt_lengths_buf_ = - (int*)(allocator_->reMalloc(tiled_prompt_lengths_buf_, sizeof(int) * batchxbeam, false)); + (int*)(allocator_->reMalloc(tiled_prompt_lengths_buf_, sizeof(int) * batchxbeam, true)); tiled_input_ids_buf_ = (int*)(allocator_->reMalloc(tiled_input_ids_buf_, sizeof(int) * batchxbeam * max_input_len, true));