Skip to content

Commit 9fef921

Browse files
authored
Merge branch 'main' into the-great-cleaning
2 parents b00b07e + 5e9ec59 commit 9fef921

File tree

3 files changed

+13
-17
lines changed

3 files changed

+13
-17
lines changed

src/transformers/models/qwen3_vl/modeling_qwen3_vl.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,7 @@ def forward(
528528
class Qwen3VLModelOutputWithPast(ModelOutput):
529529
r"""
530530
past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
531-
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
532-
`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
531+
It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).
533532
534533
Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
535534
`past_key_values` input) to speed up sequential decoding.
@@ -538,7 +537,7 @@ class Qwen3VLModelOutputWithPast(ModelOutput):
538537
"""
539538

540539
last_hidden_state: Optional[torch.FloatTensor] = None
541-
past_key_values: Optional[list[torch.FloatTensor]] = None
540+
past_key_values: Optional[Cache] = None
542541
hidden_states: Optional[tuple[torch.FloatTensor]] = None
543542
attentions: Optional[tuple[torch.FloatTensor]] = None
544543
rope_deltas: Optional[torch.LongTensor] = None
@@ -1255,8 +1254,7 @@ class Qwen3VLCausalLMOutputWithPast(ModelOutput):
12551254
logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
12561255
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
12571256
past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
1258-
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
1259-
`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
1257+
It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).
12601258
12611259
Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
12621260
`past_key_values` input) to speed up sequential decoding.
@@ -1266,7 +1264,7 @@ class Qwen3VLCausalLMOutputWithPast(ModelOutput):
12661264

12671265
loss: Optional[torch.FloatTensor] = None
12681266
logits: Optional[torch.FloatTensor] = None
1269-
past_key_values: Optional[list[torch.FloatTensor]] = None
1267+
past_key_values: Optional[Cache] = None
12701268
hidden_states: Optional[tuple[torch.FloatTensor]] = None
12711269
attentions: Optional[tuple[torch.FloatTensor]] = None
12721270
rope_deltas: Optional[torch.LongTensor] = None
@@ -1322,7 +1320,7 @@ def forward(
13221320
input_ids: torch.LongTensor = None,
13231321
attention_mask: Optional[torch.Tensor] = None,
13241322
position_ids: Optional[torch.LongTensor] = None,
1325-
past_key_values: Optional[list[torch.FloatTensor]] = None,
1323+
past_key_values: Optional[Cache] = None,
13261324
inputs_embeds: Optional[torch.FloatTensor] = None,
13271325
labels: Optional[torch.LongTensor] = None,
13281326
pixel_values: Optional[torch.Tensor] = None,

src/transformers/models/qwen3_vl/modular_qwen3_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ def forward(
11561156
input_ids: torch.LongTensor = None,
11571157
attention_mask: Optional[torch.Tensor] = None,
11581158
position_ids: Optional[torch.LongTensor] = None,
1159-
past_key_values: Optional[list[torch.FloatTensor]] = None,
1159+
past_key_values: Optional[Cache] = None,
11601160
inputs_embeds: Optional[torch.FloatTensor] = None,
11611161
labels: Optional[torch.LongTensor] = None,
11621162
pixel_values: Optional[torch.Tensor] = None,

src/transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def forward(
348348
position_embeddings: tuple[torch.Tensor, torch.Tensor],
349349
attention_mask: Optional[torch.Tensor] = None,
350350
position_ids: Optional[torch.LongTensor] = None,
351-
past_key_values: Optional[tuple[torch.Tensor]] = None,
351+
past_key_values: Optional[Cache] = None,
352352
cache_position: Optional[torch.LongTensor] = None,
353353
**kwargs: Unpack[FlashAttentionKwargs],
354354
) -> torch.FloatTensor:
@@ -366,7 +366,7 @@ def forward(
366366
use_cache (`bool`, *optional*):
367367
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
368368
(see `past_key_values`).
369-
past_key_values (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
369+
past_key_values (`Cache`, *optional*): cached past key and value projection states
370370
cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
371371
Indices depicting the position of the input sequence tokens in the sequence.
372372
position_embeddings (`tuple[torch.FloatTensor, torch.FloatTensor]`, *optional*):
@@ -1011,8 +1011,7 @@ def _deepstack_process(
10111011
class Qwen3VLMoeModelOutputWithPast(ModelOutput):
10121012
r"""
10131013
past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
1014-
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
1015-
`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
1014+
It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).
10161015
10171016
Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
10181017
`past_key_values` input) to speed up sequential decoding.
@@ -1021,7 +1020,7 @@ class Qwen3VLMoeModelOutputWithPast(ModelOutput):
10211020
"""
10221021

10231022
last_hidden_state: Optional[torch.FloatTensor] = None
1024-
past_key_values: Optional[list[torch.FloatTensor]] = None
1023+
past_key_values: Optional[Cache] = None
10251024
hidden_states: Optional[tuple[torch.FloatTensor]] = None
10261025
attentions: Optional[tuple[torch.FloatTensor]] = None
10271026
rope_deltas: Optional[torch.LongTensor] = None
@@ -1398,8 +1397,7 @@ class Qwen3VLMoeCausalLMOutputWithPast(ModelOutput):
13981397
logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
13991398
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
14001399
past_key_values (`Cache`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
1401-
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
1402-
`(batch_size, num_heads, sequence_length, embed_size_per_head)`)
1400+
It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).
14031401
14041402
Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
14051403
`past_key_values` input) to speed up sequential decoding.
@@ -1409,7 +1407,7 @@ class Qwen3VLMoeCausalLMOutputWithPast(ModelOutput):
14091407

14101408
loss: Optional[torch.FloatTensor] = None
14111409
logits: Optional[torch.FloatTensor] = None
1412-
past_key_values: Optional[list[torch.FloatTensor]] = None
1410+
past_key_values: Optional[Cache] = None
14131411
hidden_states: Optional[tuple[torch.FloatTensor]] = None
14141412
attentions: Optional[tuple[torch.FloatTensor]] = None
14151413
rope_deltas: Optional[torch.LongTensor] = None
@@ -1465,7 +1463,7 @@ def forward(
14651463
input_ids: torch.LongTensor = None,
14661464
attention_mask: Optional[torch.Tensor] = None,
14671465
position_ids: Optional[torch.LongTensor] = None,
1468-
past_key_values: Optional[list[torch.FloatTensor]] = None,
1466+
past_key_values: Optional[Cache] = None,
14691467
inputs_embeds: Optional[torch.FloatTensor] = None,
14701468
labels: Optional[torch.LongTensor] = None,
14711469
pixel_values: Optional[torch.Tensor] = None,

0 commit comments

Comments
 (0)