Skip to content

Commit

Permalink
[GPU] Enable bf16 state conversions
Browse files Browse the repository at this point in the history
  • Loading branch information
Lyamin-Roman committed Nov 4, 2024
1 parent 4d8ff86 commit 1a0866c
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 11 deletions.
4 changes: 3 additions & 1 deletion src/plugins/intel_gpu/src/plugin/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@ void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_p

// For state conversions
CASE(ov::element::f32, ov::element::f32, float, float);
CASE(ov::element::f16, ov::element::f16, ov::float16, ov::float16);
CASE(ov::element::f32, ov::element::f16, float, ov::float16);
CASE(ov::element::f16, ov::element::f32, ov::float16, float);
CASE(ov::element::f16, ov::element::f16, ov::float16, ov::float16);
CASE(ov::element::bf16, ov::element::f32, ov::bfloat16, float);
CASE(ov::element::bf16, ov::element::f16, ov::bfloat16, ov::float16);

OPENVINO_THROW("[GPU] Unsupported element types combination for copy: ", src_et, " -> ", dst_et);
}
Expand Down
10 changes: 0 additions & 10 deletions src/plugins/intel_gpu/src/runtime/execution_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,16 +212,6 @@ void ExecutionConfig::apply_debug_options(const cldnn::device_info& info) {
set_property(ov::hint::dynamic_quantization_group_size(debug_config->dynamic_quantize_group_size));
}

int KVCacheCompression = 0;
if (const auto env_var = std::getenv("KVCacheCompression")) {
std::istringstream ss(env_var);
ss >> KVCacheCompression;
}

if (KVCacheCompression == 1) {
set_property(ov::hint::kv_cache_precision(ov::element::i8));
}

GPU_DEBUG_IF(debug_config->use_kv_cache_compression != -1) {
GPU_DEBUG_IF(debug_config->use_kv_cache_compression == 1) {
set_property(ov::hint::kv_cache_precision(ov::element::i8));
Expand Down

0 comments on commit 1a0866c

Please sign in to comment.