Skip to content

Commit

Permalink
gcore/gdal_priv_templates.hpp: use SSE4.1 code path with AVX and Neon
Browse files Browse the repository at this point in the history
  • Loading branch information
rouault committed Nov 10, 2024
1 parent 504c8c9 commit ef167ea
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions gcore/gdal_priv_templates.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ static inline void GDALCopyXMMToInt64(const __m128i xmm, void *pDest)
#include <tmmintrin.h>
#endif

#if __SSE4_1__
#if defined(__SSE4_1__) || defined(__AVX__)
#include <smmintrin.h>
#endif

Expand All @@ -627,7 +627,7 @@ inline void GDALCopy4Words(const float *pValueIn, GByte *const pValueOut)

__m128i xmm_i = _mm_cvttps_epi32(xmm);

#if __SSSE3__
#if defined(__SSSE3__) || defined(USE_NEON_OPTIMIZATIONS)
xmm_i = _mm_shuffle_epi8(
xmm_i, _mm_cvtsi32_si128(0 | (4 << 8) | (8 << 16) | (12 << 24)));
#else
Expand Down Expand Up @@ -671,7 +671,7 @@ inline void GDALCopy4Words(const float *pValueIn, GUInt16 *const pValueOut)

__m128i xmm_i = _mm_cvttps_epi32(xmm);

#if __SSE4_1__
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
xmm_i = _mm_packus_epi32(xmm_i, xmm_i); // Pack int32 to uint16
#else
// Translate to int16 range because _mm_packus_epi32 is SSE4.1 only
Expand Down Expand Up @@ -742,7 +742,7 @@ inline void GDALCopy8Words(const float *pValueIn, GUInt16 *const pValueOut)
__m128i xmm_i = _mm_cvttps_epi32(xmm);
__m128i xmm1_i = _mm_cvttps_epi32(xmm1);

#if __SSE4_1__
#if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
xmm_i = _mm_packus_epi32(xmm_i, xmm1_i); // Pack int32 to uint16
#else
// Translate to int16 range because _mm_packus_epi32 is SSE4.1 only
Expand Down

0 comments on commit ef167ea

Please sign in to comment.