From db86fbd57a82c05726e640c900248caa8379fa50 Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Thu, 2 Jan 2025 10:49:45 +0800 Subject: [PATCH] code review --- examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp b/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp index 3a96b3156f..ccf386309c 100644 --- a/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp +++ b/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp @@ -64,8 +64,8 @@ HybridMemory::HybridMemory( const std::string& kv_forward_name) : Memory(modules), shard_layers_({num_layers}), - prefill_cache_len_(prefill_cache_len), kv_cache_len_(kv_cache_len), + prefill_cache_len_(prefill_cache_len), vocab_size_(vocab_size), num_layers_(num_layers), head_dim_(head_dim), @@ -332,7 +332,8 @@ void HybridMemory::prepare_prefill_io( input_tensors_[prefill_forward_name_][0].push_back(prefill_attn_mask_.get()); // [O]: logits int logit_index = 0; - Result logits = methods_meta[0]->output_tensor_meta(0); + Result logits = + methods_meta[modules_.size() - 1]->output_tensor_meta(logit_index); prefill_logits_ = std::make_unique( logits->scalar_type(), logits->sizes().size(),