Skip to content

Commit b4bac1c

Browse files
authored
[GPU] Fix access dimension error in dynamic shape (#32626)
### Description of the issue(symptom, root-cause, how it was resolved) The regression was observed in the recent memory reset bugfix PR. The issue is get_dims() failure that occurs when calling the convolution feature in dynamic shape. Solution: The fix integrates the logic from the existing get_conv_channel_count() utility to correctly and safely determine the channel dimension of the convolution's input/output under dynamic condition, preventing the dimension access failure. #### Reproduction step and snapshot (if applicable. Do not attach for customer model) ##### E2E python tools/llm_bench/benchmark.py -m ov-share-05.sclab.intel.com/cv_bench_cache/latest_models_llm/qwen2-vl-7b-instruct/pytorch/ov/OV_FP16-4BIT_DEFAULT -d GPU.1 -mc 1 -ic 256 -n 3 -pf frameworks.ai.openvino.llm.prompts/32_1024/qwen2-vl-7b-instruct.jsonl ###### Benchmark_app benchmark_app -d GPU.1 --hint none -nireq 1 -niter 1 -m ov-share-05.sclab.intel.com/cv_bench_cache/latest_models_llm/qwen2-vl-7b-instruct/pytorch/ov/OV_FP16-4BIT_DEFAULT/openvino_vision_embeddings_model.xml -data_shape hidden_states[1,1176] #### Checklist - [x] Is it a proper fix? Yes - [x] Did you include test case for this fix, if necessary? - [x] Did you review existing test that can be extended to cover this scenario? Which test did you review? mem_reset_test.cpp ### Tickets: - *CVS-175613* --------- Signed-off-by: hyunback <hyunback.kim@intel.com>
1 parent cb1ec75 commit b4bac1c

File tree

4 files changed

+56
-42
lines changed

4 files changed

+56
-42
lines changed

src/plugins/intel_gpu/src/graph/include/layout_optimizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ class reorder_factory {
8282
std::map<reorder_cache_key, std::shared_ptr<reorder>> _cached_reorders;
8383
};
8484

85+
int64_t get_convolution_channel_count(const convolution_node& conv_node, const layout& layout, bool is_input);
86+
8587
class layout_optimizer {
8688
public:
8789
enum class optimization_attributes_type {

src/plugins/intel_gpu/src/graph/layout_optimizer.cpp

Lines changed: 20 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,21 @@ std::pair<std::shared_ptr<primitive>, bool> reorder_factory::get_weights_reorder
112112
}
113113
}
114114

115+
int64_t cldnn::get_convolution_channel_count(const convolution_node& conv_node, const layout& layout, bool is_input) {
116+
auto channel_count = layout.get_partial_shape()[1].is_static() ? layout.get_partial_shape()[1].get_length() : -1;
117+
if (channel_count == -1) {
118+
auto weights_layout = conv_node.weights().get_output_layout();
119+
if (weights_layout.is_static()) {
120+
const auto& shape = weights_layout.get_partial_shape();
121+
if (is_input)
122+
channel_count = shape[conv_node.get_groups() > 1 ? 2 : 1].get_length();
123+
else
124+
channel_count = shape[conv_node.get_groups() > 1 ? 1 : 0].get_length();
125+
}
126+
}
127+
return channel_count;
128+
}
129+
115130
bool layout_optimizer::is_format_supported(program_node& node, format::type fmt) {
116131
if (node.is_type<fully_connected>() && fmt == format::byxf)
117132
return false;
@@ -250,24 +265,9 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
250265
return false;
251266
};
252267

253-
auto get_conv_channel_count = [](const convolution_node& conv_node, const layout& layout, bool is_input) -> int64_t {
254-
auto channel_count = layout.get_partial_shape()[1].is_static() ? layout.get_partial_shape()[1].get_length() : -1;
255-
if (channel_count == -1) {
256-
auto weights_layout = conv_node.weights().get_output_layout();
257-
if (weights_layout.is_static()) {
258-
const auto& shape = weights_layout.get_partial_shape();
259-
if (is_input)
260-
channel_count = shape[conv_node.get_groups() > 1 ? 2 : 1].get_length();
261-
else
262-
channel_count = shape[conv_node.get_groups() > 1 ? 1 : 0].get_length();
263-
}
264-
}
265-
return channel_count;
266-
};
267-
268268
auto& conv_node = next.as<convolution>();
269-
auto in_channel_count = get_conv_channel_count(conv_node, prev_output_layout, true);
270-
auto out_channel_count = get_conv_channel_count(conv_node, next_output_layout, false);
269+
auto in_channel_count = get_convolution_channel_count(conv_node, prev_output_layout, true);
270+
auto out_channel_count = get_convolution_channel_count(conv_node, next_output_layout, false);
271271

272272
if ((prev.is_dynamic() || next.is_dynamic()) && (in_channel_count == -1 || out_channel_count == -1))
273273
return false;
@@ -276,7 +276,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
276276
if (next.get_preferred_impl_type() == impl_types::onednn &&
277277
((fmt_prev == format::byxf && fmt_next == format::byxf) ||
278278
(fmt_prev == format::bfyx && fmt_next == format::byxf &&
279-
(prev_dt == data_types::f16 && get_conv_channel_count(conv_node, next.get_input_layout(0), false) <= 8))) &&
279+
(prev_dt == data_types::f16 && get_convolution_channel_count(conv_node, next.get_input_layout(0), false) <= 8))) &&
280280
is_input_reorder(prev, next))
281281
return true;
282282

@@ -989,22 +989,9 @@ void layout_optimizer::set_onednn_dyn_conv_preferred_format(convolution_node& no
989989
return (rank <= 4) ? cldnn::format::byxf : cldnn::format::bzyxf;
990990
};
991991

992-
// Helper function to get channel count safely
993-
auto get_channel_count = [](const layout& layout) -> int64_t {
994-
return layout.get_partial_shape()[1].is_static() ? layout.get_partial_shape()[1].get_length() : -1;
995-
};
996-
997992
// Get channel counts once
998-
int64_t input_channels = get_channel_count(input_layout);
999-
int64_t output_channels = get_channel_count(output_layout);
1000-
auto weights_layout = node.weights().get_output_layout();
1001-
// Try to get channel counts from weight layout
1002-
if (input_channels == -1 && weights_layout.is_static()) {
1003-
input_channels = weights_layout.get_partial_shape()[node.get_groups() > 1 ? 2 : 1].get_length();
1004-
}
1005-
if (output_channels == -1 && weights_layout.is_static()) {
1006-
output_channels = weights_layout.get_partial_shape()[node.get_groups() > 1 ? 1 : 0].get_length();
1007-
}
993+
auto input_channels = get_convolution_channel_count(node, input_layout, true);
994+
auto output_channels = get_convolution_channel_count(node, output_layout, false);
1008995

1009996
if (i8_u8_input) {
1010997
// Set default input format for i8/u8 input

src/plugins/intel_gpu/src/graph/primitive_inst.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,13 @@ bool primitive_inst::need_reset_output_memory() const {
606606
const bool is_user_onednn_impl = user_inst->get_node().get_preferred_impl_type() == impl_types::onednn;
607607
const bool is_user_conv = user_inst->get_node().is_type<convolution>();
608608
if (is_user_conv && is_user_onednn_impl) {
609+
auto& conv_node = user_inst->get_node().as<convolution>();
609610
auto& output_layout = _impl_params->get_output_layout(0);
611+
auto in_channel_count = get_convolution_channel_count(conv_node, output_layout, true);
612+
// If the channel count is dynamic, we cannot verify feature alignment,
613+
// so we conservatively do the reset and return true for this condition.
614+
if (in_channel_count == -1)
615+
return true;
610616

611617
auto get_feature_block_size = [](format fmt) {
612618
int feature_block_size = 1;
@@ -623,7 +629,7 @@ bool primitive_inst::need_reset_output_memory() const {
623629
auto feature_block_size = get_feature_block_size(fmt);
624630
// if layout is single blocked and feature size is not aligned with the blocking size, need to reset output so that we can guarantee zero-filling
625631
// NOTE: We may improve this logic to avoid reset if we are sure that it is not "corrupted" by other layers.
626-
if (output_layout.feature() % feature_block_size != 0) {
632+
if (in_channel_count % feature_block_size != 0) {
627633
return true;
628634
}
629635
}

src/plugins/intel_gpu/tests/unit/module_tests/mem_reset_test.cpp

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ const std::string no_bias = "";
3131

3232
struct mem_reset_params {
3333
ov::Dimension::value_type in_channel;
34+
bool is_dynamic;
3435
bool need_reset;
3536
};
3637

@@ -43,20 +44,32 @@ TEST_P(mem_reset_test, need_reset_output_memory_test) {
4344
return;
4445

4546
tests::random_generator rg(GET_SUITE_NAME);
46-
ov::PartialShape input_pshape = {1, p.in_channel, 64, 64};
47+
48+
ov::PartialShape target_pshape = {1, p.in_channel, 64, 64};
49+
ov::PartialShape input_pshape;
50+
51+
if (p.is_dynamic) {
52+
for (size_t i = 0; i < target_pshape.size(); ++i) {
53+
input_pshape.emplace_back(ov::Dimension());
54+
}
55+
input_pshape[1] = target_pshape[1];
56+
} else {
57+
input_pshape = target_pshape;
58+
}
59+
4760
ov::PartialShape weights_pshape = {16, p.in_channel, 3, 3};
4861
layout in_layout{ input_pshape, data_types::f16, format::bfyx };
4962
layout weights_layout{ weights_pshape, data_types::f16, format::bfyx };
50-
auto input_data = rg.generate_random_1d<ov::float16>(in_layout.count(), -1, 1);
51-
auto input_mem = engine.allocate_memory(in_layout);
63+
auto input_data = rg.generate_random_1d<ov::float16>(ov::shape_size(target_pshape.get_shape()), -1, 1);
64+
auto input_mem = engine.allocate_memory({ target_pshape, data_types::f16, format::bfyx });
5265
set_values(input_mem, input_data);
5366

5467
auto weights_data = rg.generate_random_1d<ov::float16>(weights_layout.count(), -1, 1);
5568
auto weights_mem = engine.allocate_memory(weights_layout);
5669
set_values(weights_mem, weights_data);
5770

58-
auto input1 = input_layout("input1", input_mem->get_layout());
59-
auto input2 = input_layout("input2", input_mem->get_layout());
71+
auto input1 = input_layout("input1", in_layout);
72+
auto input2 = input_layout("input2", in_layout);
6073
auto weights = data("weights", weights_mem);
6174
auto eltw = eltwise("eltwise", {input_info("input1"), input_info("input2")}, eltwise_mode::sum);
6275
auto eltw_reorder = reorder("reorder1", input_info("eltwise"), format::b_fs_yx_fsv16, data_types::f16 );
@@ -87,13 +100,19 @@ TEST_P(mem_reset_test, need_reset_output_memory_test) {
87100

88101
auto outputs_test_blocked = network_test_blocked.execute();
89102

90-
auto reorder_inst = network_test_blocked.get_primitive("reorder1");
103+
// Additional reorder is added and fused when force_implemenetations enable in dynamic
104+
auto target_primitive_id = p.is_dynamic ? "reorder1_0_reorder_2" : "reorder1";
105+
auto reorder_inst = network_test_blocked.get_primitive(target_primitive_id);
91106
ASSERT_TRUE(PrimitiveInstTestHelper::need_reset_output_memory(reorder_inst) == p.need_reset);
92107
}
93108

94109
INSTANTIATE_TEST_SUITE_P(smoke, mem_reset_test,
95110
testing::Values(
96-
mem_reset_params{ 9, true }, // If tensor is not packed(not aligned to 16), need_reset_output_memory == true
97-
mem_reset_params{ 16, false }
111+
// static
112+
mem_reset_params{ 9, false, true }, // If tensor is not packed(not aligned to 16), need_reset_output_memory == true
113+
mem_reset_params{ 16, false, false },
114+
// dynamic
115+
mem_reset_params{ 9, true, true },
116+
mem_reset_params{ 16, true, false }
98117
)
99118
);

0 commit comments

Comments
 (0)