Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions include/mbedtls/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,16 @@
*/
//#define MBEDTLS_SHA256_SMALLER

/**
* \def MBEDTLS_SHA512_SMALLER
*
* Enable an implementation of SHA-512 that has lower ROM footprint but also
* lower performance.
*
* Uncomment to enable the smaller implementation of SHA512.
*/
//#define MBEDTLS_SHA512_SMALLER

/**
* \def MBEDTLS_THREADING_ALT
*
Expand Down
89 changes: 53 additions & 36 deletions library/sha512.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,15 @@
}
#endif /* PUT_UINT64_BE */

#if defined(MBEDTLS_SHA512_SMALLER)
static void sha512_put_uint64_be( uint64_t n, unsigned char *b, uint8_t i )
{
PUT_UINT64_BE(n, b, i);
}
#else
#define sha512_put_uint64_be PUT_UINT64_BE
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we make it a function all the time and count on compilers to inline when compiling for performance?

#endif /* MBEDTLS_SHA512_SMALLER */

void mbedtls_sha512_init( mbedtls_sha512_context *ctx )
{
SHA512_VALIDATE( ctx != NULL );
Expand Down Expand Up @@ -219,7 +228,7 @@ int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx,
{
int i;
uint64_t temp1, temp2, W[80];
uint64_t A, B, C, D, E, F, G, H;
uint64_t A[8];

SHA512_VALIDATE_RET( ctx != NULL );
SHA512_VALIDATE_RET( (const unsigned char *)data != NULL );
Expand All @@ -244,6 +253,28 @@ int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx,
(d) += temp1; (h) = temp1 + temp2; \
} while( 0 )

for( i = 0; i < 8; i++ )
A[i] = ctx->state[i];

#if defined(MBEDTLS_SHA512_SMALLER)
for( i = 0; i < 80; i++ )
{
if( i < 16 )
{
GET_UINT64_BE( W[i], data, i << 3 );
}
else
{
W[i] = S1(W[i - 2]) + W[i - 7] +
S0(W[i - 15]) + W[i - 16];
}

P( A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[i], K[i] );

temp1 = A[7]; A[7] = A[6]; A[6] = A[5]; A[5] = A[4]; A[4] = A[3];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can save a few more bytes with

        temp1 = A[7];
        for( k = 1; k < 8; k++ )
            A[k] = A[k - 1];
        A[0] = temp1;

On the other hand, I tried removing the rotation of A and running

        P( A[(80-i+0)%8], A[(80-i+1)%8], A[(80-i+2)%8], A[(80-i+3)%8], A[(80-i+4)%8], A[(80-i+5)%8], A[(80-i+6)%8], A[(80-i+7)%8],
           W[i], K[i] );

but that has almost the same code size as your version. I wonder if there's more to gain there by tweaking P and its auxiliary macros to use temporaries for certain calculations.

A[3] = A[2]; A[2] = A[1]; A[1] = A[0]; A[0] = temp1;
}
#else /* MBEDTLS_SHA512_SMALLER */
for( i = 0; i < 16; i++ )
{
GET_UINT64_BE( W[i], data, i << 3 );
Expand All @@ -255,37 +286,23 @@ int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx,
S0(W[i - 15]) + W[i - 16];
}

A = ctx->state[0];
B = ctx->state[1];
C = ctx->state[2];
D = ctx->state[3];
E = ctx->state[4];
F = ctx->state[5];
G = ctx->state[6];
H = ctx->state[7];
i = 0;

do
{
P( A, B, C, D, E, F, G, H, W[i], K[i] ); i++;
P( H, A, B, C, D, E, F, G, W[i], K[i] ); i++;
P( G, H, A, B, C, D, E, F, W[i], K[i] ); i++;
P( F, G, H, A, B, C, D, E, W[i], K[i] ); i++;
P( E, F, G, H, A, B, C, D, W[i], K[i] ); i++;
P( D, E, F, G, H, A, B, C, W[i], K[i] ); i++;
P( C, D, E, F, G, H, A, B, W[i], K[i] ); i++;
P( B, C, D, E, F, G, H, A, W[i], K[i] ); i++;
P( A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[i], K[i] ); i++;
P( A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[i], K[i] ); i++;
P( A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[i], K[i] ); i++;
P( A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[i], K[i] ); i++;
P( A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], W[i], K[i] ); i++;
P( A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], W[i], K[i] ); i++;
P( A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], W[i], K[i] ); i++;
P( A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], W[i], K[i] ); i++;
}
while( i < 80 );
#endif /* MBEDTLS_SHA512_SMALLER */

ctx->state[0] += A;
ctx->state[1] += B;
ctx->state[2] += C;
ctx->state[3] += D;
ctx->state[4] += E;
ctx->state[5] += F;
ctx->state[6] += G;
ctx->state[7] += H;
for( i = 0; i < 8; i++ )
ctx->state[i] += A[i];

return( 0 );
}
Expand Down Expand Up @@ -403,26 +420,26 @@ int mbedtls_sha512_finish_ret( mbedtls_sha512_context *ctx,
| ( ctx->total[1] << 3 );
low = ( ctx->total[0] << 3 );

PUT_UINT64_BE( high, ctx->buffer, 112 );
PUT_UINT64_BE( low, ctx->buffer, 120 );
sha512_put_uint64_be( high, ctx->buffer, 112 );
sha512_put_uint64_be( low, ctx->buffer, 120 );

if( ( ret = mbedtls_internal_sha512_process( ctx, ctx->buffer ) ) != 0 )
return( ret );

/*
* Output final state
*/
PUT_UINT64_BE( ctx->state[0], output, 0 );
PUT_UINT64_BE( ctx->state[1], output, 8 );
PUT_UINT64_BE( ctx->state[2], output, 16 );
PUT_UINT64_BE( ctx->state[3], output, 24 );
PUT_UINT64_BE( ctx->state[4], output, 32 );
PUT_UINT64_BE( ctx->state[5], output, 40 );
sha512_put_uint64_be( ctx->state[0], output, 0 );
sha512_put_uint64_be( ctx->state[1], output, 8 );
sha512_put_uint64_be( ctx->state[2], output, 16 );
sha512_put_uint64_be( ctx->state[3], output, 24 );
sha512_put_uint64_be( ctx->state[4], output, 32 );
sha512_put_uint64_be( ctx->state[5], output, 40 );

if( ctx->is384 == 0 )
{
PUT_UINT64_BE( ctx->state[6], output, 48 );
PUT_UINT64_BE( ctx->state[7], output, 56 );
sha512_put_uint64_be( ctx->state[6], output, 48 );
sha512_put_uint64_be( ctx->state[7], output, 56 );
}

return( 0 );
Expand Down
3 changes: 3 additions & 0 deletions library/version_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,9 @@ static const char *features[] = {
#if defined(MBEDTLS_SHA256_SMALLER)
"MBEDTLS_SHA256_SMALLER",
#endif /* MBEDTLS_SHA256_SMALLER */
#if defined(MBEDTLS_SHA512_SMALLER)
"MBEDTLS_SHA512_SMALLER",
#endif /* MBEDTLS_SHA512_SMALLER */
#if defined(MBEDTLS_THREADING_ALT)
"MBEDTLS_THREADING_ALT",
#endif /* MBEDTLS_THREADING_ALT */
Expand Down
8 changes: 8 additions & 0 deletions programs/test/query_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,14 @@ int query_config( const char *config )
}
#endif /* MBEDTLS_SHA256_SMALLER */

#if defined(MBEDTLS_SHA512_SMALLER)
if( strcmp( "MBEDTLS_SHA512_SMALLER", config ) == 0 )
{
MACRO_EXPANSION_TO_STR( MBEDTLS_SHA512_SMALLER );
return( 0 );
}
#endif /* MBEDTLS_SHA512_SMALLER */

#if defined(MBEDTLS_THREADING_ALT)
if( strcmp( "MBEDTLS_THREADING_ALT", config ) == 0 )
{
Expand Down