From db86fbd57a82c05726e640c900248caa8379fa50 Mon Sep 17 00:00:00 2001
From: winskuo-quic <quic_winskuo@quicinc.com>
Date: Thu, 2 Jan 2025 10:49:45 +0800
Subject: [PATCH] code review

---
 examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp b/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp
index 3a96b3156f..ccf386309c 100644
--- a/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp
+++ b/examples/qualcomm/oss_scripts/llama3_2/runner/io_memory.cpp
@@ -64,8 +64,8 @@ HybridMemory::HybridMemory(
     const std::string& kv_forward_name)
     : Memory(modules),
       shard_layers_({num_layers}),
-      prefill_cache_len_(prefill_cache_len),
       kv_cache_len_(kv_cache_len),
+      prefill_cache_len_(prefill_cache_len),
       vocab_size_(vocab_size),
       num_layers_(num_layers),
       head_dim_(head_dim),
@@ -332,7 +332,8 @@ void HybridMemory::prepare_prefill_io(
   input_tensors_[prefill_forward_name_][0].push_back(prefill_attn_mask_.get());
   // [O]: logits
   int logit_index = 0;
-  Result<TensorInfo> logits = methods_meta[0]->output_tensor_meta(0);
+  Result<TensorInfo> logits =
+      methods_meta[modules_.size() - 1]->output_tensor_meta(logit_index);
   prefill_logits_ = std::make_unique<TensorImpl>(
       logits->scalar_type(),
       logits->sizes().size(),