1616 tensor_model_parallel_all_gather )
1717from vllm .logger import init_logger
1818from vllm .platforms import _Backend , current_platform
19- from vllm .utils .jsontree import json_map_leaves
2019
2120logger = init_logger (__name__ )
2221
@@ -136,15 +135,16 @@ def resolve_visual_encoder_outputs(
136135 feature_select_strategy: Defines how to select the hidden states
137136 from each layer.
138137 """
139- if feature_select_strategy is not None :
140- select_features = _get_vision_feature_selector (feature_select_strategy )
141- encoder_outputs = json_map_leaves (select_features , encoder_outputs )
142-
143138 if select_layers is None :
144139 if not isinstance (encoder_outputs , torch .Tensor ):
145140 raise ValueError ("Expected only a single encoder output when "
146141 "`select_layers` is not provided" )
147142
143+ if feature_select_strategy is not None :
144+ select_features = _get_vision_feature_selector (
145+ feature_select_strategy )
146+ encoder_outputs = select_features (encoder_outputs )
147+
148148 if post_layer_norm is not None :
149149 return post_layer_norm (encoder_outputs )
150150
@@ -168,6 +168,10 @@ def resolve_visual_encoder_outputs(
168168 for layer_idx in select_layers
169169 ]
170170
171+ if feature_select_strategy is not None :
172+ select_features = _get_vision_feature_selector (feature_select_strategy )
173+ hs_pool = [select_features (hs ) for hs in hs_pool ]
174+
171175 # Apply post-norm on the final hidden state if we are using it
172176 uses_last_layer = select_layers [- 1 ] in (max_possible_layers - 1 , - 1 )
173177 if post_layer_norm is not None and uses_last_layer :
0 commit comments