From 5e45b6158131e06e3190b71cf54bc5d11fe40281 Mon Sep 17 00:00:00 2001 From: Derek Kozel Date: Tue, 4 Aug 2020 22:30:17 +0100 Subject: [PATCH] Added new kernel 4ic_deinterleave_8i_x2 Implemented generic and SSE2 protokernels --- kernels/volk/volk_4ic_deinterleave_8i_x2.h | 127 +++++++++++++++++++++ lib/kernel_tests.h | 1 + 2 files changed, 128 insertions(+) create mode 100644 kernels/volk/volk_4ic_deinterleave_8i_x2.h diff --git a/kernels/volk/volk_4ic_deinterleave_8i_x2.h b/kernels/volk/volk_4ic_deinterleave_8i_x2.h new file mode 100644 index 000000000..1ed73d596 --- /dev/null +++ b/kernels/volk/volk_4ic_deinterleave_8i_x2.h @@ -0,0 +1,127 @@ +/* -*- c++ -*- */ +/* + * Copyright 2020 Free Software Foundation, Inc. + * + * This file is part of GNU Radio + * + * GNU Radio is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * GNU Radio is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Radio; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, + * Boston, MA 02110-1301, USA. + */ + +/*! + * \page volk_4ic_deinterleave_8i_x2 + * + * \b Overview + * + * Deinterleaves the complex 4-bit integer vector into I & Q vector data + * and converts them to 8-bit integers. + * + * Dispatcher Prototype + * \code + * void volk_4ic_deinterleave_8i_x2(int8_t* iBuffer, int8t_t* qBuffer, const int8_t* + * complexVector, unsigned int num_points) \endcode + * + * \b Inputs + * \li complexVector: The complex input vector. + * \li num_points: The number of complex data values to be deinterleaved. + * + * \b Outputs + * \li iBuffer: The I buffer output data. + * \li qBuffer: The Q buffer output data. + * + * \b Example + * \code + * int N = 10000; + * + * volk_4ic_deinterleave_8i_x2(); + * + * volk_free(x); + * \endcode + * + */ + +#ifndef INCLUDED_volk_4ic_deinterleave_8i_x2_a_H +#define INCLUDED_volk_4ic_deinterleave_8i_x2_a_H + +#include +#include + +#ifdef LV_HAVE_GENERIC + +static inline void volk_4ic_deinterleave_8i_x2_generic(int8_t* iBuffer, + int8_t* qBuffer, + const int8_t* complexVector, + unsigned int num_points) +{ + const int8_t* complexVectorPtr = complexVector; + int8_t* iBufferPtr = iBuffer; + int8_t* qBufferPtr = qBuffer; + for (unsigned int i = 0; i < num_points; i++) { + *iBufferPtr++ = (int8_t)(*complexVectorPtr) >> 4; + *qBufferPtr++ = ((int8_t)(*complexVectorPtr++) << 4) >> 4; + } +} + +#endif /* LV_HAVE_GENERIC */ + +#ifdef LV_HAVE_SSE2 +#include + +static inline void volk_4ic_deinterleave_8i_x2_u_sse2(int8_t* iBuffer, + int8_t* qBuffer, + const int8_t* complexVector, + unsigned int num_points) +{ + // SSE2 algorithm was written by Andrey Semashev, licensed as CC-BY-SA + // https://stackoverflow.com/questions/63200053/deinterleve-vector-of-nibbles-using-simd + const __m128i mask = _mm_set1_epi32(0x0F0F0F0F); + const __m128i signed_max = _mm_set1_epi32(0x07070707); + + int8_t* complexVectorPtr = complexVector; + int8_t* iBufferPtr = iBuffer; + int8_t* qBufferPtr = qBuffer; + + const unsigned int num_blocks = num_points / 16; + for (int block_index = 0; block_index < num_blocks; block_index++) { + // Load and deinterleave input half-bytes + __m128i input_even = _mm_loadu_si128((__m128i*)complexVectorPtr); + __m128i input_odd = _mm_srli_epi32(input_even, 4); + complexVectorPtr += 16; + + input_even = _mm_and_si128(input_even, mask); + input_odd = _mm_and_si128(input_odd, mask); + + // Get the sign bits + __m128i sign_even = _mm_cmpgt_epi8(input_even, signed_max); + __m128i sign_odd = _mm_cmpgt_epi8(input_odd, signed_max); + + // Combine sign bits with deinterleaved input + input_even = _mm_or_si128(input_even, _mm_andnot_si128(mask, sign_even)); + input_odd = _mm_or_si128(input_odd, _mm_andnot_si128(mask, sign_odd)); + + // Store the results + _mm_storeu_si128((__m128i*)iBufferPtr, input_even); + _mm_storeu_si128((__m128i*)qBufferPtr, input_odd); + iBufferPtr += 16; + qBufferPtr += 16; + } + + const int remainder = num_points % 16; + volk_4ic_deinterleave_8i_x2_generic( + iBufferPtr, qBufferPtr, complexVectorPtr, remainder); +} + +#endif /* LV_HAVE_SSE2 */ +#endif /* INCLUDED_volk_4ic_deinterleave_8i_x2_u_H */ diff --git a/lib/kernel_tests.h b/lib/kernel_tests.h index de340f45e..eb265e153 100644 --- a/lib/kernel_tests.h +++ b/lib/kernel_tests.h @@ -143,6 +143,7 @@ std::vector init_test_list(volk_test_params_t test_params) QA(VOLK_INIT_TEST(volk_8ic_x2_s32f_multiply_conjugate_32fc, test_params)) QA(VOLK_INIT_TEST(volk_8i_convert_16i, test_params)) QA(VOLK_INIT_TEST(volk_8i_s32f_convert_32f, test_params)) + QA(VOLK_INIT_TEST(volk_4ic_deinterleave_8i_x2, test_params)) QA(VOLK_INIT_TEST(volk_32fc_s32fc_multiply_32fc, test_params)) QA(VOLK_INIT_TEST(volk_32f_s32f_multiply_32f, test_params)) QA(VOLK_INIT_TEST(volk_32f_s32f_add_32f, test_params))