Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-927] Enable AVX512 in Binary length calculation for native Colum…
Browse files Browse the repository at this point in the history
…nartoRow (#924)

* Fix AVX512 code of calculating binary length

* Fix clang format

* Enable NATIVE_AVX512 In C code
  • Loading branch information
zhixingheyi-tian authored May 24, 2022
1 parent c81f8af commit 10605fb
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 26 deletions.
1 change: 1 addition & 0 deletions native-sql-engine/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ if(NATIVE_AVX512)
list(APPEND SPARK_COLUMNAR_PLUGIN_SRCS operators/columnar_to_row_converter_avx512.cc)
set(NATIVE_AVX512_FLAG "-march=native")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${NATIVE_AVX512_FLAG}")
add_definitions(-DNATIVE_AVX512)
else()
list(APPEND SPARK_COLUMNAR_PLUGIN_SRCS operators/columnar_to_row_converter.cc)
endif()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,35 +137,33 @@ arrow::Status ColumnarToRowConverter::Init(
if (arrow::is_binary_like(array->type_id())) {
auto binary_array = std::static_pointer_cast<arrow::BinaryArray>(array);
using offset_type = typename arrow::BinaryType::offset_type;
// offset_type length;
const offset_type* offsetarray = binary_array->raw_value_offsets();
// __m256i x7_8x = _mm256_load_si256((__m256i*)x_7);
// __m256i x8_8x = _mm256_load_si256((__m256i*)x_8);
__m256i x7_8x = _mm256_load_si256((__m256i*)x_7);
__m256i x8_8x = _mm256_load_si256((__m256i*)x_8);
int32_t j = 0;
int32_t* length_data = lengths_.data();
//
// __m256i offsetarray_1_8x;
// if (j + 16 < num_rows_)
// {
// offsetarray_1_8x = _mm256_load_si256((__m256i*)&offsetarray[j]);
// }
// for (j; j + 16 < num_rows_; j += 8) {
// __m256i offsetarray_8x = offsetarray_1_8x;
// offsetarray_1_8x = _mm256_load_si256((__m256i*)&offsetarray[j+8]);
//
// __m256i length_8x =
// _mm256_alignr_epi32(offsetarray_8x,offsetarray_1_8x,0x1); length_8x =
// _mm256_sub_epi32(length_8x, offsetarray_8x);
//
// __m256i reminder_8x = _mm256_and_si256(length_8x, x7_8x);
// reminder_8x = _mm256_sub_epi32(x8_8x,reminder_8x);
// reminder_8x = _mm256_and_si256(reminder_8x,x7_8x);
// __m256i dst_length_8x = _mm256_loadu_si256((__m256i*)length_data);
// dst_length_8x = _mm256_add_epi32(dst_length_8x, reminder_8x);
// _mm256_storeu_si256((__m256i*)length_data,dst_length_8x);
// length_data+=8;
// _mm_prefetch(&offsetarray[j+(128+128)/sizeof(offset_type)],_MM_HINT_T0);
// }

__m256i offsetarray_1_8x;
if (j + 16 < num_rows_) {
offsetarray_1_8x = _mm256_loadu_si256((__m256i*)&offsetarray[j]);
}
for (j; j + 16 < num_rows_; j += 8) {
__m256i offsetarray_8x = offsetarray_1_8x;
offsetarray_1_8x = _mm256_loadu_si256((__m256i*)&offsetarray[j + 8]);

__m256i length_8x = _mm256_alignr_epi32(offsetarray_1_8x, offsetarray_8x, 0x1);
length_8x = _mm256_sub_epi32(length_8x, offsetarray_8x);

__m256i reminder_8x = _mm256_and_si256(length_8x, x7_8x);
reminder_8x = _mm256_sub_epi32(x8_8x, reminder_8x);
reminder_8x = _mm256_and_si256(reminder_8x, x7_8x);
reminder_8x = _mm256_add_epi32(reminder_8x, length_8x);
__m256i dst_length_8x = _mm256_loadu_si256((__m256i*)length_data);
dst_length_8x = _mm256_add_epi32(dst_length_8x, reminder_8x);
_mm256_storeu_si256((__m256i*)length_data, dst_length_8x);
length_data += 8;
_mm_prefetch(&offsetarray[j + (128 + 128) / sizeof(offset_type)], _MM_HINT_T0);
}

for (j; j < num_rows_; j++) {
offset_type length = offsetarray[j + 1] - offsetarray[j];
Expand Down

0 comments on commit 10605fb

Please sign in to comment.