Skip to content

Commit

Permalink
Add XOP aware ChaCha
Browse files Browse the repository at this point in the history
ChaCha is about 50% faster using XOP for the rotates on AMD machines
  • Loading branch information
noloader committed Oct 24, 2018
1 parent b4c4c5a commit ed4d57c
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions chacha-simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
# include <tmmintrin.h>
#endif

#ifdef __XOP__
# include <ammintrin.h>
#endif

#if (CRYPTOPP_ARM_NEON_AVAILABLE)
# include <arm_neon.h>
#endif
Expand All @@ -47,24 +51,36 @@ ANONYMOUS_NAMESPACE_BEGIN
template <unsigned int R>
inline __m128i RotateLeft(const __m128i val)
{
#ifdef __XOP__
return _mm_roti_epi32(val, R);
#else
return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
#endif
}

#ifdef __SSSE3__
#if defined(__SSSE3__)
template <>
inline __m128i RotateLeft<8>(const __m128i val)
{
#ifdef __XOP__
return _mm_roti_epi32(val, R);
#else
const __m128i mask = _mm_set_epi8(14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3);
return _mm_shuffle_epi8(val, mask);
#endif
}

template <>
inline __m128i RotateLeft<16>(const __m128i val)
{
#ifdef __XOP__
return _mm_roti_epi32(val, R);
#else
const __m128i mask = _mm_set_epi8(13,12,15,14, 9,8,11,10, 5,4,7,6, 1,0,3,2);
return _mm_shuffle_epi8(val, mask);
}
#endif
}
#endif // SSE3

#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE

Expand Down

0 comments on commit ed4d57c

Please sign in to comment.