|
15 | 15 | # limitations under the License. |
16 | 16 | # |
17 | 17 |
|
| 18 | +import time |
18 | 19 | from typing import Dict, Optional |
19 | 20 |
|
20 | 21 | from vllm.outputs import CompletionOutput, RequestOutput |
@@ -84,7 +85,7 @@ def from_seq_group( |
84 | 85 | output_token_ids = seq.get_output_token_ids_to_return(delta) |
85 | 86 | num_output_tokens = 1 if isinstance(output_token_ids, |
86 | 87 | int) else len(output_token_ids) |
87 | | - num_cached_tokens = seq.data.get_num_cached_tokens() # noqa |
| 88 | + num_cached_tokens = seq.data.get_num_cached_tokens() |
88 | 89 |
|
89 | 90 | output_logprobs = seq.output_logprobs if include_logprobs else None |
90 | 91 |
|
@@ -139,7 +140,44 @@ def from_seq_group( |
139 | 140 |
|
140 | 141 | outputs.append(output) |
141 | 142 |
|
142 | | - return None |
| 143 | + # Every sequence in the sequence group should have the same prompt. |
| 144 | + if include_prompt: |
| 145 | + prompt = seq_group.prompt |
| 146 | + prompt_token_ids = seq_group.prompt_token_ids |
| 147 | + encoder_prompt = seq_group.encoder_prompt |
| 148 | + encoder_prompt_token_ids = seq_group.encoder_prompt_token_ids |
| 149 | + prompt_logprobs = seq_group.prompt_logprobs |
| 150 | + else: |
| 151 | + prompt = None |
| 152 | + prompt_token_ids = None |
| 153 | + encoder_prompt = None |
| 154 | + encoder_prompt_token_ids = None |
| 155 | + prompt_logprobs = None |
| 156 | + finished_time = time.time() if finished else None |
| 157 | + seq_group.set_finished_time(finished_time) |
| 158 | + |
| 159 | + init_kwargs = { |
| 160 | + "request_id": seq_group.request_id, |
| 161 | + "prompt": prompt, |
| 162 | + "prompt_token_ids": prompt_token_ids, |
| 163 | + "prompt_logprobs": prompt_logprobs, |
| 164 | + "outputs": outputs, |
| 165 | + "finished": finished, |
| 166 | + "metrics": seq_group.metrics, |
| 167 | + "lora_request": seq_group.lora_request, |
| 168 | + "encoder_prompt": encoder_prompt, |
| 169 | + "encoder_prompt_token_ids": encoder_prompt_token_ids, |
| 170 | + "num_cached_tokens": num_cached_tokens, |
| 171 | + "multi_modal_placeholders": seq_group.multi_modal_placeholders |
| 172 | + } |
| 173 | + |
| 174 | + if use_cache: |
| 175 | + request_output = seq_group.cached_request_output |
| 176 | + request_output.__init__(**init_kwargs) # type: ignore |
| 177 | + else: |
| 178 | + request_output = cls(**init_kwargs) # type: ignore |
| 179 | + |
| 180 | + return request_output |
143 | 181 |
|
144 | 182 |
|
145 | 183 | # Add code to clear finished seq in seq_id_to_seq_group |
|
0 commit comments