Skip to content

Commit

Permalink
Added new kernel 4ic_deinterleave_8i_x2
Browse files Browse the repository at this point in the history
Implemented generic and SSE2 protokernels
  • Loading branch information
dkozel committed Aug 5, 2020
1 parent 91e5d07 commit 5e45b61
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 0 deletions.
127 changes: 127 additions & 0 deletions kernels/volk/volk_4ic_deinterleave_8i_x2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/* -*- c++ -*- */
/*
* Copyright 2020 Free Software Foundation, Inc.
*
* This file is part of GNU Radio
*
* GNU Radio is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU Radio is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Radio; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street,
* Boston, MA 02110-1301, USA.
*/

/*!
* \page volk_4ic_deinterleave_8i_x2
*
* \b Overview
*
* Deinterleaves the complex 4-bit integer vector into I & Q vector data
* and converts them to 8-bit integers.
*
* <b> Dispatcher Prototype</b>
* \code
* void volk_4ic_deinterleave_8i_x2(int8_t* iBuffer, int8t_t* qBuffer, const int8_t*
* complexVector, unsigned int num_points) \endcode
*
* \b Inputs
* \li complexVector: The complex input vector.
* \li num_points: The number of complex data values to be deinterleaved.
*
* \b Outputs
* \li iBuffer: The I buffer output data.
* \li qBuffer: The Q buffer output data.
*
* \b Example
* \code
* int N = 10000;
*
* volk_4ic_deinterleave_8i_x2();
*
* volk_free(x);
* \endcode
*
*/

#ifndef INCLUDED_volk_4ic_deinterleave_8i_x2_a_H
#define INCLUDED_volk_4ic_deinterleave_8i_x2_a_H

#include <inttypes.h>
#include <stdio.h>

#ifdef LV_HAVE_GENERIC

static inline void volk_4ic_deinterleave_8i_x2_generic(int8_t* iBuffer,
int8_t* qBuffer,
const int8_t* complexVector,
unsigned int num_points)
{
const int8_t* complexVectorPtr = complexVector;
int8_t* iBufferPtr = iBuffer;
int8_t* qBufferPtr = qBuffer;
for (unsigned int i = 0; i < num_points; i++) {
*iBufferPtr++ = (int8_t)(*complexVectorPtr) >> 4;
*qBufferPtr++ = ((int8_t)(*complexVectorPtr++) << 4) >> 4;
}
}

#endif /* LV_HAVE_GENERIC */

#ifdef LV_HAVE_SSE2
#include <emmintrin.h>

static inline void volk_4ic_deinterleave_8i_x2_u_sse2(int8_t* iBuffer,
int8_t* qBuffer,
const int8_t* complexVector,
unsigned int num_points)
{
// SSE2 algorithm was written by Andrey Semashev, licensed as CC-BY-SA
// https://stackoverflow.com/questions/63200053/deinterleve-vector-of-nibbles-using-simd
const __m128i mask = _mm_set1_epi32(0x0F0F0F0F);
const __m128i signed_max = _mm_set1_epi32(0x07070707);

int8_t* complexVectorPtr = complexVector;
int8_t* iBufferPtr = iBuffer;
int8_t* qBufferPtr = qBuffer;

const unsigned int num_blocks = num_points / 16;
for (int block_index = 0; block_index < num_blocks; block_index++) {
// Load and deinterleave input half-bytes
__m128i input_even = _mm_loadu_si128((__m128i*)complexVectorPtr);
__m128i input_odd = _mm_srli_epi32(input_even, 4);
complexVectorPtr += 16;

input_even = _mm_and_si128(input_even, mask);
input_odd = _mm_and_si128(input_odd, mask);

// Get the sign bits
__m128i sign_even = _mm_cmpgt_epi8(input_even, signed_max);
__m128i sign_odd = _mm_cmpgt_epi8(input_odd, signed_max);

// Combine sign bits with deinterleaved input
input_even = _mm_or_si128(input_even, _mm_andnot_si128(mask, sign_even));
input_odd = _mm_or_si128(input_odd, _mm_andnot_si128(mask, sign_odd));

// Store the results
_mm_storeu_si128((__m128i*)iBufferPtr, input_even);
_mm_storeu_si128((__m128i*)qBufferPtr, input_odd);
iBufferPtr += 16;
qBufferPtr += 16;
}

const int remainder = num_points % 16;
volk_4ic_deinterleave_8i_x2_generic(
iBufferPtr, qBufferPtr, complexVectorPtr, remainder);
}

#endif /* LV_HAVE_SSE2 */
#endif /* INCLUDED_volk_4ic_deinterleave_8i_x2_u_H */
1 change: 1 addition & 0 deletions lib/kernel_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
QA(VOLK_INIT_TEST(volk_8ic_x2_s32f_multiply_conjugate_32fc, test_params))
QA(VOLK_INIT_TEST(volk_8i_convert_16i, test_params))
QA(VOLK_INIT_TEST(volk_8i_s32f_convert_32f, test_params))
QA(VOLK_INIT_TEST(volk_4ic_deinterleave_8i_x2, test_params))
QA(VOLK_INIT_TEST(volk_32fc_s32fc_multiply_32fc, test_params))
QA(VOLK_INIT_TEST(volk_32f_s32f_multiply_32f, test_params))
QA(VOLK_INIT_TEST(volk_32f_s32f_add_32f, test_params))
Expand Down

0 comments on commit 5e45b61

Please sign in to comment.