Skip to content

Commit

Permalink
Add SSE4 optimized SHA256
Browse files Browse the repository at this point in the history
The SHA256 hashing optimizations for architectures supporting SSE4, which should lead to ~50% speedups in SHA256 on supported hardware (~5% faster synchronization and block validation).

- bitcoin/bitcoin@16240f4

Use --enable-experimental-asm in configure to enable SSE4.
  • Loading branch information
DeckerSU committed Nov 13, 2019
1 parent 3b0bad8 commit 9c22593
Show file tree
Hide file tree
Showing 6 changed files with 1,604 additions and 17 deletions.
11 changes: 11 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,16 @@ AC_ARG_ENABLE([glibc-back-compat],
[use_glibc_compat=$enableval],
[use_glibc_compat=no])

AC_ARG_ENABLE([experimental-asm],
[AS_HELP_STRING([--enable-experimental-asm],
[Enable experimental assembly routines (default is no)])],
[experimental_asm=$enableval],
[experimental_asm=no])

if test "x$experimental_asm" = xyes; then
AC_DEFINE(EXPERIMENTAL_ASM, 1, [Define this symbol to build in experimental assembly routines])
fi

AC_ARG_ENABLE([zmq],
[AS_HELP_STRING([--disable-zmq],
[disable ZMQ notifications])],
Expand Down Expand Up @@ -1030,6 +1040,7 @@ AM_CONDITIONAL([USE_COMPARISON_TOOL],[test x$use_comparison_tool != xno])
AM_CONDITIONAL([USE_COMPARISON_TOOL_REORG_TESTS],[test x$use_comparison_tool_reorg_test != xno])
AM_CONDITIONAL([GLIBC_BACK_COMPAT],[test x$use_glibc_compat = xyes])
AM_CONDITIONAL([HARDEN],[test x$use_hardening = xyes])
AM_CONDITIONAL([EXPERIMENTAL_ASM],[test x$experimental_asm = xyes])

AC_DEFINE(CLIENT_VERSION_MAJOR, _CLIENT_VERSION_MAJOR, [Major version])
AC_DEFINE(CLIENT_VERSION_MINOR, _CLIENT_VERSION_MINOR, [Minor version])
Expand Down
4 changes: 4 additions & 0 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,10 @@ crypto_libbitcoin_crypto_a_SOURCES = \
crypto/verus_hash.h \
crypto/verus_hash.cpp

if EXPERIMENTAL_ASM
crypto_libbitcoin_crypto_a_SOURCES += crypto/sha256_sse4.cpp
endif

if ENABLE_MINING
EQUIHASH_TROMP_SOURCES = \
pow/tromp/equi_miner.h \
Expand Down
77 changes: 68 additions & 9 deletions src/crypto/sha256.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,22 @@
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include "crypto/sha256.h"

#include "crypto/common.h"

#include <assert.h>
#include <string.h>
#include <stdexcept>

#if defined(__x86_64__) || defined(__amd64__)
#if defined(EXPERIMENTAL_ASM)
#include <cpuid.h>
namespace sha256_sse4
{
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
}
#endif
#endif

// Internal implementation code.
namespace
{
Expand Down Expand Up @@ -44,9 +54,10 @@ void inline Initialize(uint32_t* s)
s[7] = 0x5be0cd19ul;
}

/** Perform one SHA-256 transformation, processing a 64-byte chunk. */
void Transform(uint32_t* s, const unsigned char* chunk)
/** Perform a number of SHA-256 transformations, processing 64-byte chunks. */
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
{
while (blocks--) {
uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;

Expand Down Expand Up @@ -126,11 +137,59 @@ void Transform(uint32_t* s, const unsigned char* chunk)
s[5] += f;
s[6] += g;
s[7] += h;
chunk += 64;
}
}

} // namespace sha256

typedef void (*TransformType)(uint32_t*, const unsigned char*, size_t);

bool SelfTest(TransformType tr) {
static const unsigned char in1[65] = {0, 0x80};
static const unsigned char in2[129] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0
};
static const uint32_t init[8] = {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul};
static const uint32_t out1[8] = {0xe3b0c442ul, 0x98fc1c14ul, 0x9afbf4c8ul, 0x996fb924ul, 0x27ae41e4ul, 0x649b934cul, 0xa495991bul, 0x7852b855ul};
static const uint32_t out2[8] = {0xce4153b0ul, 0x147c2a86ul, 0x3ed4298eul, 0xe0676bc8ul, 0x79fc77a1ul, 0x2abe1f49ul, 0xb2b055dful, 0x1069523eul};
uint32_t buf[8];
memcpy(buf, init, sizeof(buf));
// Process nothing, and check we remain in the initial state.
tr(buf, nullptr, 0);
if (memcmp(buf, init, sizeof(buf))) return false;
// Process the padded empty string (unaligned)
tr(buf, in1 + 1, 1);
if (memcmp(buf, out1, sizeof(buf))) return false;
// Process 64 spaces (unaligned)
memcpy(buf, init, sizeof(buf));
tr(buf, in2 + 1, 2);
if (memcmp(buf, out2, sizeof(buf))) return false;
return true;
}

TransformType Transform = sha256::Transform;

} // namespace

std::string SHA256AutoDetect()
{
#if defined(EXPERIMENTAL_ASM) && (defined(__x86_64__) || defined(__amd64__))
uint32_t eax, ebx, ecx, edx;
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx >> 19) & 1) {
Transform = sha256_sse4::Transform;
assert(SelfTest(Transform));
return "sse4";
}
#endif

assert(SelfTest(Transform));
return "standard";
}

////// SHA-256

Expand All @@ -148,14 +207,14 @@ CSHA256& CSHA256::Write(const unsigned char* data, size_t len)
memcpy(buf + bufsize, data, 64 - bufsize);
bytes += 64 - bufsize;
data += 64 - bufsize;
sha256::Transform(s, buf);
Transform(s, buf, 1);
bufsize = 0;
}
while (end >= data + 64) {
// Process full chunks directly from the source.
sha256::Transform(s, data);
bytes += 64;
data += 64;
if (end - data >= 64) {
size_t blocks = (end - data) / 64;
Transform(s, data, blocks);
data += 64 * blocks;
bytes += 64 * blocks;
}
if (end > data) {
// Fill the buffer with what remains.
Expand Down
21 changes: 13 additions & 8 deletions src/crypto/sha256.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2014 The Bitcoin Core developers
// Copyright (c) 2014-2016 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

Expand All @@ -7,26 +7,31 @@

#include <stdint.h>
#include <stdlib.h>
#include <string>

/** A hasher class for SHA-256. */
class CSHA256
{
public:
static const size_t OUTPUT_SIZE = 32;

private:
uint32_t s[8];
unsigned char buf[64];
size_t bytes;
void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE], bool enforce_compression);
public:
CSHA256();
CSHA256& Write(const unsigned char* data, size_t len);
void Finalize(unsigned char hash[OUTPUT_SIZE]);
void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE]) {
FinalizeNoPadding(hash, true);
};
CSHA256& Reset();

private:
uint32_t s[8];
unsigned char buf[64];
size_t bytes;
void FinalizeNoPadding(unsigned char hash[OUTPUT_SIZE], bool enforce_compression);
};

/** Autodetect the best available SHA256 implementation.
* Returns the name of the implementation.
*/
std::string SHA256AutoDetect();

#endif // BITCOIN_CRYPTO_SHA256_H
Loading

0 comments on commit 9c22593

Please sign in to comment.