Skip to content

Commit

Permalink
Remove ARIA SIMD code (GH #1235)
Browse files Browse the repository at this point in the history
ARIA SIMD code existed to perform an XOR and the end of encryption and decryption. It was a lot of work to save for the final XOR.
Worse, the final XOR seemed to be causing problems as described in GH #1235. Once we unrolled the XOR and used them when building outBlock, the 1235 issue went away.
  • Loading branch information
noloader committed Sep 30, 2023
1 parent 8d3e357 commit 5250ab2
Show file tree
Hide file tree
Showing 11 changed files with 28 additions and 277 deletions.
1 change: 0 additions & 1 deletion Filelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ arc4.cpp
arc4.h
ariatab.cpp
aria.cpp
aria_simd.cpp
aria.h
argnames.h
arm_simd.h
Expand Down
4 changes: 0 additions & 4 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -1635,10 +1635,6 @@ NOSTD_CXXFLAGS=$(filter-out -stdlib=%,$(filter-out -std=%,$(CXXFLAGS)))
aes_armv4.o : aes_armv4.S
$(CXX) $(strip $(CPPFLAGS) $(ASFLAGS) $(NOSTD_CXXFLAGS) $(CRYPTOGAMS_ARM_THUMB_FLAG) -c) $<

# SSSE3 or NEON available
aria_simd.o : aria_simd.cpp
$(CXX) $(strip $(CPPFLAGS) $(CXXFLAGS) $(ARIA_FLAG) -c) $<

# SSE, NEON or POWER7 available
blake2s_simd.o : blake2s_simd.cpp
$(CXX) $(strip $(CPPFLAGS) $(CXXFLAGS) $(BLAKE2S_FLAG) -c) $<
Expand Down
4 changes: 0 additions & 4 deletions GNUmakefile-cross
Original file line number Diff line number Diff line change
Expand Up @@ -975,10 +975,6 @@ aes_armv4.o : aes_armv4.S
cpu-features.o: cpu-features.h cpu-features.c
$(CXX) -x c $(strip $(CPPFLAGS) $(NOSTD_CXXFLAGS) -c) cpu-features.c

# SSSE3 or NEON available
aria_simd.o : aria_simd.cpp
$(CXX) $(strip $(CPPFLAGS) $(CXXFLAGS) $(ARIA_FLAG) -c) $<

# SSE, NEON or POWER7 available
blake2s_simd.o : blake2s_simd.cpp
$(CXX) $(strip $(CPPFLAGS) $(CXXFLAGS) $(BLAKE2S_FLAG) -c) $<
Expand Down
4 changes: 2 additions & 2 deletions TestScripts/cryptest-android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ do
# In the past we looked for the vector loads, stores and shifts using vld and friends.
# It looks like objdump changed its output format on Android after Clang, so we need
# to check for statements like eor v0.16b, v2.16b, v0.16b nowadays.
count=$(${OBJDUMP} --disassemble aria_simd.o 2>&1 | grep -c -E 'vld|vst|vshl|vshr|veor|v0\.|v1\.|v2\.|v3\.|v4\.|v5\.|v6\.|v7\.')
count=$(${OBJDUMP} --disassemble chacha_simd.o 2>&1 | grep -c -E 'vld|vst|vshl|vshr|veor|v0\.|v1\.|v2\.|v3\.|v4\.|v5\.|v6\.|v7\.')
if [[ "${count}" -gt 64 ]]
then
echo "${platform} : NEON ==> SUCCESS" >> "${TMPDIR}/build.log"
Expand All @@ -136,7 +136,7 @@ do
# In the past we looked for the vector loads, stores and shifts using vld and friends.
# It looks like objdump changed its output format on Android after Clang, so we need
# to check for statements like eor v0.16b, v2.16b, v0.16b nowadays.
count=$(${OBJDUMP} --disassemble aria_simd.o 2>&1 | grep -c -E 'vld|vst|vshl|vshr|veor|v0\.|v1\.|v2\.|v3\.|v4\.|v5\.|v6\.|v7\.')
count=$(${OBJDUMP} --disassemble chacha_simd.o 2>&1 | grep -c -E 'vld|vst|vshl|vshr|veor|v0\.|v1\.|v2\.|v3\.|v4\.|v5\.|v6\.|v7\.')
if [[ "${count}" -gt 64 ]]
then
echo "${platform} : ASIMD ==> SUCCESS" >> "${TMPDIR}/build.log"
Expand Down
4 changes: 2 additions & 2 deletions TestScripts/cryptest-ios.sh
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ do
then

# Test NEON code generation
count=$(otool -tV aria_simd.o 2>&1 | grep -c -E 'vld|vst|vshl|vshr|veor')
count=$(otool -tV chacha_simd.o 2>&1 | grep -c -E 'vld|vst|vshl|vshr|veor')
if [[ "${count}" -gt 64 ]]
then
echo "${platform} : NEON ==> SUCCESS" >> "${TMPDIR}/build.log"
Expand All @@ -122,7 +122,7 @@ do
then

# Test ASIMD code generation
count=$(otool -tV aria_simd.o 2>&1 | grep -c -E 'ldr[[:space:]]*q|str[[:space:]]*q|shl.4|shr.4|eor.16')
count=$(otool -tV chacha_simd.o 2>&1 | grep -c -E 'ldr[[:space:]]*q|str[[:space:]]*q|shl.4|shr.4|eor.16')
if [[ "${count}" -gt 64 ]]
then
echo "${platform} : ASIMD ==> SUCCESS" >> "${TMPDIR}/build.log"
Expand Down
2 changes: 1 addition & 1 deletion TestScripts/cryptest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1645,7 +1645,7 @@ if [[ ("$HAVE_DISASS" -ne 0 && ("$IS_ARM32" -ne 0 || "$IS_ARM64" -ne 0)) ]]; the

TEST_LIST+=("ARM NEON code generation")

OBJFILE=aria_simd.o; rm -f "$OBJFILE" 2>/dev/null
OBJFILE=chacha_simd.o; rm -f "$OBJFILE" 2>/dev/null
CXX="${CXX}" CXXFLAGS="$RELEASE_CXXFLAGS" "$MAKE" "${MAKEARGS[@]}" $OBJFILE 2>&1 | tee -a "$TEST_RESULTS"

COUNT=0
Expand Down
84 changes: 21 additions & 63 deletions aria.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,6 @@
#include "misc.h"
#include "cpu.h"

#if CRYPTOPP_SSE2_INTRIN_AVAILABLE
# define CRYPTOPP_ENABLE_ARIA_SSE2_INTRINSICS 1
#endif

#if CRYPTOPP_SSSE3_AVAILABLE
# define CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS 1
#endif

NAMESPACE_BEGIN(CryptoPP)
NAMESPACE_BEGIN(ARIATab)

Expand Down Expand Up @@ -97,15 +89,6 @@ inline void ARIA_FE(word32 t[4]) {
ARIA_MM(t[0],t[1],t[2],t[3]);
}

#if (CRYPTOPP_ARM_NEON_AVAILABLE)
extern void ARIA_UncheckedSetKey_Schedule_NEON(byte* rk, word32* ws, unsigned int keylen);
extern void ARIA_ProcessAndXorBlock_NEON(const byte* xorBlock, byte* outblock, const byte *rk, word32 *t);
#endif

#if (CRYPTOPP_SSSE3_AVAILABLE)
extern void ARIA_ProcessAndXorBlock_SSSE3(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t);
#endif

// n-bit right shift of Y XORed to X
template <unsigned int N>
inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16])
Expand Down Expand Up @@ -190,38 +173,29 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam

w3[0]=t[0]^w1[0]; w3[1]=t[1]^w1[1]; w3[2]=t[2]^w1[2]; w3[3]=t[3]^w1[3];

#if CRYPTOPP_ARM_NEON_AVAILABLE
if (HasNEON())
{
ARIA_UncheckedSetKey_Schedule_NEON(rk, m_w, keylen);
}
else
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
ARIA_GSRK<19>(w0, w1, rk + 0);
ARIA_GSRK<19>(w1, w2, rk + 16);
ARIA_GSRK<19>(w2, w3, rk + 32);
ARIA_GSRK<19>(w3, w0, rk + 48);
ARIA_GSRK<31>(w0, w1, rk + 64);
ARIA_GSRK<31>(w1, w2, rk + 80);
ARIA_GSRK<31>(w2, w3, rk + 96);
ARIA_GSRK<31>(w3, w0, rk + 112);
ARIA_GSRK<67>(w0, w1, rk + 128);
ARIA_GSRK<67>(w1, w2, rk + 144);
ARIA_GSRK<67>(w2, w3, rk + 160);
ARIA_GSRK<67>(w3, w0, rk + 176);
ARIA_GSRK<97>(w0, w1, rk + 192);

if (keylen > 16)
{
ARIA_GSRK<19>(w0, w1, rk + 0);
ARIA_GSRK<19>(w1, w2, rk + 16);
ARIA_GSRK<19>(w2, w3, rk + 32);
ARIA_GSRK<19>(w3, w0, rk + 48);
ARIA_GSRK<31>(w0, w1, rk + 64);
ARIA_GSRK<31>(w1, w2, rk + 80);
ARIA_GSRK<31>(w2, w3, rk + 96);
ARIA_GSRK<31>(w3, w0, rk + 112);
ARIA_GSRK<67>(w0, w1, rk + 128);
ARIA_GSRK<67>(w1, w2, rk + 144);
ARIA_GSRK<67>(w2, w3, rk + 160);
ARIA_GSRK<67>(w3, w0, rk + 176);
ARIA_GSRK<97>(w0, w1, rk + 192);

if (keylen > 16)
ARIA_GSRK<97>(w1, w2, rk + 208);
ARIA_GSRK<97>(w2, w3, rk + 224);

if (keylen > 24)
{
ARIA_GSRK<97>(w1, w2, rk + 208);
ARIA_GSRK<97>(w2, w3, rk + 224);

if (keylen > 24)
{
ARIA_GSRK< 97>(w3, w0, rk + 240);
ARIA_GSRK<109>(w0, w1, rk + 256);
}
ARIA_GSRK< 97>(w3, w0, rk + 240);
ARIA_GSRK<109>(w0, w1, rk + 256);
}
}

Expand Down Expand Up @@ -293,22 +267,6 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
rk = ARIA_KXL(rk, t); ARIA_FO(t); rk = ARIA_KXL(rk, t); ARIA_FE(t);
rk = ARIA_KXL(rk, t); ARIA_FO(t); rk = ARIA_KXL(rk, t);

#if CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
if (HasSSSE3())
{
ARIA_ProcessAndXorBlock_SSSE3(xorBlock, outBlock, rk, t);
return;
}
else
#endif // CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
{
ARIA_ProcessAndXorBlock_NEON(xorBlock, outBlock, rk, t);
return;
}
else
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
#if (CRYPTOPP_LITTLE_ENDIAN)
{
outBlock[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] ) ^ rk[ 3];
Expand Down
194 changes: 0 additions & 194 deletions aria_simd.cpp

This file was deleted.

Loading

0 comments on commit 5250ab2

Please sign in to comment.