From 5e189b4b8d5b24454eb65b685d598b6922299769 Mon Sep 17 00:00:00 2001 From: Ben Laurie Date: Thu, 10 Jul 2014 17:47:48 +0100 Subject: [PATCH 01/19] Don't clean up uninitialised EVP_CIPHER_CTX on error (CID 483259). (cherry picked from commit c1d1b0114e9d370c30649e46182393dbfc00e20c) --- crypto/cms/cms_pwri.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/cms/cms_pwri.c b/crypto/cms/cms_pwri.c index b79612a12df49..71f2ddb49695c 100644 --- a/crypto/cms/cms_pwri.c +++ b/crypto/cms/cms_pwri.c @@ -93,9 +93,10 @@ CMS_RecipientInfo *CMS_add0_recipient_password(CMS_ContentInfo *cms, X509_ALGOR *encalg = NULL; unsigned char iv[EVP_MAX_IV_LENGTH]; int ivlen; + env = cms_get0_enveloped(cms); if (!env) - goto err; + return NULL; if (wrap_nid <= 0) wrap_nid = NID_id_alg_PWRI_KEK; From 5b9188454b70effb029741d06b64df45f078fb12 Mon Sep 17 00:00:00 2001 From: Richard Levitte Date: Sun, 13 Jul 2014 19:11:29 +0200 Subject: [PATCH 02/19] * crypto/ui/ui_lib.c: misplaced brace in switch statement. Detected by dcruette@qualitesys.com (cherry picked from commit 8b5dd340919e511137696792279f595a70ae2762) --- crypto/ui/ui_lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/ui/ui_lib.c b/crypto/ui/ui_lib.c index a8abc270642a9..167da002d24c7 100644 --- a/crypto/ui/ui_lib.c +++ b/crypto/ui/ui_lib.c @@ -916,9 +916,9 @@ int UI_set_result(UI *ui, UI_STRING *uis, const char *result) break; } } + } default: break; } - } return 0; } From 2fbd94252a119c064fc043da2b47a3510ee4b238 Mon Sep 17 00:00:00 2001 From: Peter Mosmans Date: Sun, 13 Jul 2014 18:30:07 +0100 Subject: [PATCH 03/19] Add names of GOST algorithms. PR#3440 (cherry picked from commit 924e5eda2c82d737cc5a1b9c37918aa6e34825da) --- ssl/ssl_ciph.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c index 9c7fbc302260d..c511279a5d949 100644 --- a/ssl/ssl_ciph.c +++ b/ssl/ssl_ciph.c @@ -1677,6 +1677,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) case SSL_kSRP: kx="SRP"; break; + case SSL_kGOST: + kx="VKO"; + break; default: kx="unknown"; } @@ -1710,6 +1713,12 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) case SSL_aSRP: au="SRP"; break; + case SSL_aGOST94: + au="GOST94"; + break; + case SSL_aGOST01: + au="GOST01"; + break; default: au="unknown"; break; @@ -1757,6 +1766,9 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) case SSL_SEED: enc="SEED(128)"; break; + case SSL_eGOST2814789CNT: + enc="GOST89(256)"; + break; default: enc="unknown"; break; @@ -1779,6 +1791,12 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) case SSL_AEAD: mac="AEAD"; break; + case SSL_GOST89MAC: + mac="GOST89"; + break; + case SSL_GOST94: + mac="GOST94"; + break; default: mac="unknown"; break; From 14b5d0d029da51cb6874aa7d39d1253c43442ee2 Mon Sep 17 00:00:00 2001 From: Matt Caswell Date: Thu, 10 Jul 2014 23:47:31 +0100 Subject: [PATCH 04/19] Fixed valgrind complaint due to BN_consttime_swap reading uninitialised data. This is actually ok for this function, but initialised to zero anyway if PURIFY defined. This does have the impact of masking any *real* unitialised data reads in bn though. Patch based on approach suggested by Rich Salz. PR#3415 (cherry picked from commit 77747e2d9a5573b1dbc15e247ce18c03374c760c) --- crypto/bn/bn_lib.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c index 5461e6ee7dbee..d5a211e288b68 100644 --- a/crypto/bn/bn_lib.c +++ b/crypto/bn/bn_lib.c @@ -320,6 +320,15 @@ static BN_ULONG *bn_expand_internal(const BIGNUM *b, int words) BNerr(BN_F_BN_EXPAND_INTERNAL,ERR_R_MALLOC_FAILURE); return(NULL); } +#ifdef PURIFY + /* Valgrind complains in BN_consttime_swap because we process the whole + * array even if it's not initialised yet. This doesn't matter in that + * function - what's important is constant time operation (we're not + * actually going to use the data) + */ + memset(a, 0, sizeof(BN_ULONG)*words); +#endif + #if 1 B=b->d; /* Check if the previous number needs to be copied */ From fa2b54c83a78eaf8aa1ce652d20feec3193ede7e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 14 Jul 2014 15:05:50 +0100 Subject: [PATCH 05/19] Use more common name for GOST key exchange. (cherry picked from commit 7aabd9c92fe6f0ea2a82869e5171dcc4518cee85) --- ssl/ssl_ciph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c index c511279a5d949..eb1acf17cd6a9 100644 --- a/ssl/ssl_ciph.c +++ b/ssl/ssl_ciph.c @@ -1678,7 +1678,7 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) kx="SRP"; break; case SSL_kGOST: - kx="VKO"; + kx="GOST"; break; default: kx="unknown"; From cdae9a58e638f3d5bb3d326aff675db431e98c7b Mon Sep 17 00:00:00 2001 From: Hubert Kario Date: Fri, 6 Jun 2014 16:48:43 +0200 Subject: [PATCH 06/19] document -nextprotoneg option in man pages Add description of the option to advertise support of Next Protocol Negotiation extension (-nextprotoneg) to man pages of s_client and s_server. PR#3444 (cherry picked from commit 7efd0e777e65eaa6c60d85b1cc5c889f872f8fc4) --- doc/apps/s_client.pod | 12 ++++++++++++ doc/apps/s_server.pod | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/doc/apps/s_client.pod b/doc/apps/s_client.pod index 1e8ba23456da0..85a17e6502d38 100644 --- a/doc/apps/s_client.pod +++ b/doc/apps/s_client.pod @@ -49,6 +49,7 @@ B B [B<-rand file(s)>] [B<-serverinfo types>] [B<-status>] +[B<-nextprotoneg protocols>] =head1 DESCRIPTION @@ -273,6 +274,17 @@ file. sends a certificate status request to the server (OCSP stapling). The server response (if any) is printed out. +=item B<-nextprotoneg protocols> + +enable Next Protocol Negotiation TLS extension and provide a list of +comma-separated protocol names that the client should advertise +support for. The list should contain most wanted protocols first. +Protocol names are printable ASCII strings, for example "http/1.1" or +"spdy/3". +Empty list of protocols is treated specially and will cause the client to +advertise support for the TLS extension but disconnect just after +reciving ServerHello with a list of server supported protocols. + =back =head1 CONNECTED COMMANDS diff --git a/doc/apps/s_server.pod b/doc/apps/s_server.pod index 7b87ad0f03830..99015cc4c3612 100644 --- a/doc/apps/s_server.pod +++ b/doc/apps/s_server.pod @@ -61,6 +61,8 @@ B B [B<-status_verbose>] [B<-status_timeout nsec>] [B<-status_url url>] +[B<-nextprotoneg protocols>] + =head1 DESCRIPTION The B command implements a generic SSL/TLS server which listens @@ -317,6 +319,14 @@ sets a fallback responder URL to use if no responder URL is present in the server certificate. Without this option an error is returned if the server certificate does not contain a responder address. +=item B<-nextprotoneg protocols> + +enable Next Protocol Negotiation TLS extension and provide a +comma-separated list of supported protocol names. +The list should contain most wanted protocols first. +Protocol names are printable ASCII strings, for example "http/1.1" or +"spdy/3". + =back =head1 CONNECTED COMMANDS From c71e37aa6c7d2ad379a37768b6599cd7904c2f9e Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Mon, 14 Jul 2014 23:59:13 +0100 Subject: [PATCH 07/19] Use case insensitive compare for servername. PR#3445 (cherry picked from commit 1c3e9a7c67ccdc5e770829fe951e5832e600d377) --- apps/s_server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/s_server.c b/apps/s_server.c index 911c360d3ebac..a399f5978fd43 100644 --- a/apps/s_server.c +++ b/apps/s_server.c @@ -767,7 +767,7 @@ static int MS_CALLBACK ssl_servername_cb(SSL *s, int *ad, void *arg) if (servername) { - if (strcmp(servername,p->servername)) + if (strcasecmp(servername,p->servername)) return p->extension_error; if (ctx2) { From d4dbabb814cbf8b886f778834609aa614f48d502 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 15 Jul 2014 12:22:49 +0100 Subject: [PATCH 08/19] Don't allow -www etc options with DTLS. The options which emulate a web server don't make sense when doing DTLS. Exit with an error if an attempt is made to use them. PR#3453 (cherry picked from commit 58a2aaeade8bdecd0f9f0df41927f7cff3012547) --- apps/s_server.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/apps/s_server.c b/apps/s_server.c index a399f5978fd43..e0ec5fa700681 100644 --- a/apps/s_server.c +++ b/apps/s_server.c @@ -1524,6 +1524,14 @@ int MAIN(int argc, char *argv[]) sv_usage(); goto end; } +#ifndef OPENSSL_NO_DTLS1 + if (www && socket_type == SOCK_DGRAM) + { + BIO_printf(bio_err, + "Can't use -HTTP, -www or -WWW with DTLS\n"); + goto end; + } +#endif #if !defined(OPENSSL_NO_JPAKE) && !defined(OPENSSL_NO_PSK) if (jpake_secret) From 666a597ffb9bcf3ba2d49e711fcca28df91eff9d Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 15 Jul 2014 18:21:59 +0100 Subject: [PATCH 09/19] Fix DTLS certificate requesting code. Use same logic when determining when to expect a client certificate for both TLS and DTLS. PR#3452 (cherry picked from commit c8d710dc5f83d69d802f941a4cc5895eb5fe3d65) --- ssl/d1_srvr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ssl/d1_srvr.c b/ssl/d1_srvr.c index c69d44b839f59..a6e7d053cac79 100644 --- a/ssl/d1_srvr.c +++ b/ssl/d1_srvr.c @@ -616,10 +616,11 @@ int dtls1_accept(SSL *s) s->state = SSL3_ST_SR_CLNT_HELLO_C; } else { - /* could be sent for a DH cert, even if we - * have not asked for it :-) */ - ret=ssl3_get_client_certificate(s); - if (ret <= 0) goto end; + if (s->s3->tmp.cert_request) + { + ret=ssl3_get_client_certificate(s); + if (ret <= 0) goto end; + } s->init_num=0; s->state=SSL3_ST_SR_KEY_EXCH_A; } From beac6cb5aa6701dee4efb650c9e554fb485b7a13 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Tue, 15 Jul 2014 20:22:39 +0100 Subject: [PATCH 10/19] Clarify -Verify and PSK. PR#3452 (cherry picked from commit ca2015a617842fed3d36ed4dcbbf8d5e27bc5216) --- doc/apps/s_server.pod | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/apps/s_server.pod b/doc/apps/s_server.pod index 99015cc4c3612..b37f410fb9ce6 100644 --- a/doc/apps/s_server.pod +++ b/doc/apps/s_server.pod @@ -152,6 +152,9 @@ the client. With the B<-verify> option a certificate is requested but the client does not have to send one, with the B<-Verify> option the client must supply a certificate or an error occurs. +If the ciphersuite cannot request a client certificate (for example an +anonymous ciphersuite or PSK) this option has no effect. + =item B<-crl_check>, B<-crl_check_all> Check the peer certificate has not been revoked by its CA. From b11c24110cbc78b3b54c115dd87f9d99ef7c9ce7 Mon Sep 17 00:00:00 2001 From: Matt Caswell Date: Tue, 15 Jul 2014 22:47:29 +0100 Subject: [PATCH 11/19] Add Matt Caswell's fingerprint, and general update on the fingerprints file to bring it up to date Reviewed-by: Tim Hudson (cherry picked from commit 3bd548192a03142c80cf8bc68659d79dea20a738) --- doc/fingerprints.txt | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/doc/fingerprints.txt b/doc/fingerprints.txt index 4030c81fa5001..373e90d0a1e78 100644 --- a/doc/fingerprints.txt +++ b/doc/fingerprints.txt @@ -4,12 +4,11 @@ OpenSSL releases are signed with PGP/GnuPG keys. You can find the signatures in separate files in the same location you find the distributions themselves. The normal file name is the same as the distribution file, with '.asc' added. For example, the signature for -the distribution of OpenSSL 0.9.7f, openssl-0.9.7f.tar.gz, is found in -the file openssl-0.9.7f.tar.gz.asc. +the distribution of OpenSSL 1.0.1h, openssl-1.0.1h.tar.gz, is found in +the file openssl-1.0.1h.tar.gz.asc. The following is the list of fingerprints for the keys that are -currently in use (have been used since summer 2004) to sign OpenSSL -distributions: +currently in use to sign OpenSSL distributions: pub 1024D/F709453B 2003-10-20 Key fingerprint = C4CA B749 C34F 7F4C C04F DAC9 A7AF 9E78 F709 453B @@ -34,10 +33,6 @@ uid Mark Cox uid Mark Cox uid Mark Cox -pub 1024R/26BB437D 1997-04-28 - Key fingerprint = 00 C9 21 8E D1 AB 70 37 DD 67 A2 3A 0A 6F 8D A5 -uid Ralf S. Engelschall - pub 1024R/9C58A66D 1997-04-03 Key fingerprint = 13 D0 B8 9D 37 30 C3 ED AC 9C 24 7D 45 8C 17 67 uid jaenicke@openssl.org @@ -62,3 +57,7 @@ uid Bodo Moeller <3moeller@informatik.uni-hamburg.de> uid Bodo Moeller uid Bodo Moeller <3moeller@rzdspc5.informatik.uni-hamburg.de> +pub 2048R/0E604491 2013-04-30 + Key fingerprint = 8657 ABB2 60F0 56B1 E519 0839 D9C4 D26D 0E60 4491 +uid Matt Caswell + From ca818b322d549f44f7f344d4a94d8a754f85598f Mon Sep 17 00:00:00 2001 From: Matt Caswell Date: Sun, 13 Jul 2014 23:28:13 +0100 Subject: [PATCH 12/19] Disabled XTS mode in enc utility as it is not supported PR#3442 Reviewed-by: Tim Hudson Reviewed-by: Rich Salz (cherry picked from commit 2097a17c576f2395a10b05f14490688bc5f45a07) --- apps/enc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/apps/enc.c b/apps/enc.c index c6a211ba87fd6..c8cb021224e6d 100644 --- a/apps/enc.c +++ b/apps/enc.c @@ -339,6 +339,12 @@ int MAIN(int argc, char **argv) goto end; } + if (cipher && (EVP_CIPHER_mode(cipher) == EVP_CIPH_XTS_MODE)) + { + BIO_printf(bio_err, "Ciphers in XTS mode are not supported by the enc utility\n"); + goto end; + } + if (md && (dgst=EVP_get_digestbyname(md)) == NULL) { BIO_printf(bio_err,"%s is an unsupported message digest type\n",md); From 6ccd120f5f85c6fe61bcab6d635e4fdc22df4722 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Thu, 17 Jul 2014 11:25:02 +0100 Subject: [PATCH 13/19] Fix typo, add reference. PR#3456 Reviewed-by: Stephen Henson Reviewed-by: Matt Caswell (cherry picked from commit d48e78f0cf22aaddb563f4bcfccf25b1a45ac8a4) --- doc/crypto/pem.pod | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/crypto/pem.pod b/doc/crypto/pem.pod index 54414a3f6f37d..21e9fe3b98a90 100644 --- a/doc/crypto/pem.pod +++ b/doc/crypto/pem.pod @@ -450,9 +450,9 @@ byte B encoded as a set of hexadecimal digits. After this is the base64 encoded encrypted data. -The encryption key is determined using EVP_bytestokey(), using B and an +The encryption key is determined using EVP_BytesToKey(), using B and an iteration count of 1. The IV used is the value of B and *not* the IV -returned by EVP_bytestokey(). +returned by EVP_BytesToKey(). =head1 BUGS @@ -474,3 +474,7 @@ The read routines return either a pointer to the structure read or NULL if an error occurred. The write routines return 1 for success or 0 for failure. + +=head1 SEE ALSO + +L, L From 6e1e5996df318132eb4188e80faa17f64d94009a Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Thu, 17 Jul 2014 02:50:48 +0100 Subject: [PATCH 14/19] Sanity check lengths for AES wrap algorithm. Reviewed-by: Tim Hudson (cherry picked from commit d12eef15016e49fc09d6c96653c61624e032d1a3) --- crypto/evp/e_aes.c | 6 +++++- crypto/modes/wrap128.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index 8150e02f87894..d20cecaaadedd 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -2076,7 +2076,11 @@ static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; size_t rv; if (inlen % 8) - return 0; + return -1; + if (ctx->encrypt && inlen < 8) + return -1; + if (!ctx->encrypt && inlen < 16) + return -1; if (!out) { if (ctx->encrypt) diff --git a/crypto/modes/wrap128.c b/crypto/modes/wrap128.c index 18785320f29ff..c6c14cdaaa06b 100644 --- a/crypto/modes/wrap128.c +++ b/crypto/modes/wrap128.c @@ -106,7 +106,7 @@ size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, unsigned char *A, B[16], *R; size_t i, j, t; inlen -= 8; - if ((inlen & 0x7) || (inlen < 8) || (inlen > CRYPTO128_WRAP_MAX)) + if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX)) return 0; A = B; t = 6 * (inlen >> 3); From 4c05b1f8d641f3e25532aad955ed586ad614c114 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Thu, 17 Jul 2014 22:27:50 +0100 Subject: [PATCH 15/19] Make *Final work for key wrap again. Reviewed-by: Tim Hudson (cherry picked from commit 58f4698f67c33b723a9e99bed1101161a59eea73) --- crypto/evp/e_aes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index d20cecaaadedd..ae1fd0a2ed9f9 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -2075,6 +2075,8 @@ static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, { EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; size_t rv; + if (!in) + return 0; if (inlen % 8) return -1; if (ctx->encrypt && inlen < 8) @@ -2088,8 +2090,6 @@ static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, else return inlen - 8; } - if (!in) - return 0; if (ctx->encrypt) rv = CRYPTO_128_wrap(&wctx->ks.ks, wctx->iv, out, in, inlen, (block128_f)AES_encrypt); From be12cb3e24ad15343e83e826a63a9aa4a3edaea7 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sat, 19 Jul 2014 14:20:05 +0100 Subject: [PATCH 16/19] Fix documentation for RSA_set_method(3) PR#1675 Reviewed-by: Matt Caswell (cherry picked from commit 197400c3f0d617d71ad8167b52fb73046d334320) --- doc/crypto/RSA_set_method.pod | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/crypto/RSA_set_method.pod b/doc/crypto/RSA_set_method.pod index 2c963d7e5bbaa..0ef0781186514 100644 --- a/doc/crypto/RSA_set_method.pod +++ b/doc/crypto/RSA_set_method.pod @@ -125,14 +125,18 @@ the default method is used. /* sign. For backward compatibility, this is used only * if (flags & RSA_FLAG_SIGN_VER) */ - int (*rsa_sign)(int type, unsigned char *m, unsigned int m_len, - unsigned char *sigret, unsigned int *siglen, RSA *rsa); - + int (*rsa_sign)(int type, + const unsigned char *m, unsigned int m_length, + unsigned char *sigret, unsigned int *siglen, const RSA *rsa); /* verify. For backward compatibility, this is used only * if (flags & RSA_FLAG_SIGN_VER) */ - int (*rsa_verify)(int type, unsigned char *m, unsigned int m_len, - unsigned char *sigbuf, unsigned int siglen, RSA *rsa); + int (*rsa_verify)(int dtype, + const unsigned char *m, unsigned int m_length, + const unsigned char *sigbuf, unsigned int siglen, + const RSA *rsa); + /* keygen. If NULL builtin RSA key generation will be used */ + int (*rsa_keygen)(RSA *rsa, int bits, BIGNUM *e, BN_GENCB *cb); } RSA_METHOD; From c991d8ae8bf712983ba527eef43388d8bb120a03 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sun, 20 Jul 2014 14:36:49 +0200 Subject: [PATCH 17/19] Initial POWER8 support from development branch. Reviewed-by: Kurt Roeckx Reviewed-by: Tim Hudson --- Configure | 2 +- TABLE | 60 +- crypto/aes/Makefile | 2 + crypto/aes/asm/aesp8-ppc.pl | 1940 +++++++++++++++++++++++++++++++ crypto/evp/e_aes.c | 15 +- crypto/modes/Makefile | 2 + crypto/modes/asm/ghashp8-ppc.pl | 234 ++++ crypto/modes/gcm128.c | 17 + crypto/perlasm/ppc-xlate.pl | 36 + crypto/ppc_arch.h | 10 + crypto/ppccap.c | 25 +- crypto/ppccpuid.pl | 10 + crypto/sha/Makefile | 2 + crypto/sha/asm/sha512-ppc.pl | 4 +- crypto/sha/asm/sha512p8-ppc.pl | 423 +++++++ 15 files changed, 2744 insertions(+), 38 deletions(-) create mode 100755 crypto/aes/asm/aesp8-ppc.pl create mode 100755 crypto/modes/asm/ghashp8-ppc.pl create mode 100644 crypto/ppc_arch.h create mode 100755 crypto/sha/asm/sha512p8-ppc.pl diff --git a/Configure b/Configure index 3be3fee4ffdd7..13b8677413ef1 100755 --- a/Configure +++ b/Configure @@ -140,7 +140,7 @@ my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o:::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:"; my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; -my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; +my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; my $ppc32_asm=$ppc64_asm; my $no_asm=":::::::::::::::void"; diff --git a/TABLE b/TABLE index 010a0bacc2b79..92a3ec9e1f5a6 100644 --- a/TABLE +++ b/TABLE @@ -902,17 +902,17 @@ $bn_ops = BN_LLONG RC4_CHAR $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = aix32 $dso_scheme = dlfcn @@ -935,17 +935,17 @@ $bn_ops = BN_LLONG RC4_CHAR $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = aix32 $dso_scheme = dlfcn @@ -1001,17 +1001,17 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = aix64 $dso_scheme = dlfcn @@ -1034,17 +1034,17 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = aix64 $dso_scheme = dlfcn @@ -1463,17 +1463,17 @@ $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = osx32 $dso_scheme = dlfcn @@ -1496,17 +1496,17 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = osx64 $dso_scheme = dlfcn @@ -2189,17 +2189,17 @@ $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = osx32 $dso_scheme = dlfcn @@ -4466,17 +4466,17 @@ $bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = linux32 $dso_scheme = dlfcn @@ -4499,17 +4499,17 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = linux64 $dso_scheme = dlfcn @@ -4532,17 +4532,17 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL $cpuid_obj = ppccpuid.o ppccap.o $bn_obj = bn-ppc.o ppc-mont.o ppc64-mont.o $des_obj = -$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o +$aes_obj = aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o $bf_obj = $md5_obj = -$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o +$sha1_obj = sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o $cast_obj = $rc4_obj = $rmd160_obj = $rc5_obj = $wp_obj = $cmll_obj = -$modes_obj = +$modes_obj = ghashp8-ppc.o $engines_obj = $perlasm_scheme = linux64le $dso_scheme = dlfcn diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile index e9fa404baf426..709b1af39b7ff 100644 --- a/crypto/aes/Makefile +++ b/crypto/aes/Makefile @@ -79,6 +79,8 @@ aes-ppc.s: asm/aes-ppc.pl $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ vpaes-ppc.s: asm/vpaes-ppc.pl $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@ +aesp8-ppc.s: asm/aesp8-ppc.pl + $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@ aes-parisc.s: asm/aes-parisc.pl $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@ diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl new file mode 100755 index 0000000000000..3ee8979e76255 --- /dev/null +++ b/crypto/aes/asm/aesp8-ppc.pl @@ -0,0 +1,1940 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# This module implements support for AES instructions as per PowerISA +# specification version 2.07, first implemented by POWER8 processor. +# The module is endian-agnostic in sense that it supports both big- +# and little-endian cases. Data alignment in parallelizable modes is +# handled with VSX loads and stores, which implies MSR.VSX flag being +# set. It should also be noted that ISA specification doesn't prohibit +# alignment exceptions for these instructions on page boundaries. +# Initially alignment was handled in pure AltiVec/VMX way [when data +# is aligned programmatically, which in turn guarantees exception- +# free execution], but it turned to hamper performance when vcipher +# instructions are interleaved. It's reckoned that eventual +# misalignment penalties at page boundaries are in average lower +# than additional overhead in pure AltiVec approach. + +$flavour = shift; + +if ($flavour =~ /64/) { + $SIZE_T =8; + $LRSAVE =2*$SIZE_T; + $STU ="stdu"; + $POP ="ld"; + $PUSH ="std"; + $UCMP ="cmpld"; + $SHL ="sldi"; +} elsif ($flavour =~ /32/) { + $SIZE_T =4; + $LRSAVE =$SIZE_T; + $STU ="stwu"; + $POP ="lwz"; + $PUSH ="stw"; + $UCMP ="cmplw"; + $SHL ="slwi"; +} else { die "nonsense $flavour"; } + +$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; + +$FRAME=8*$SIZE_T; +$prefix="aes_p8"; + +$sp="r1"; +$vrsave="r12"; + +######################################################################### +{{{ # Key setup procedures # +my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); +my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); +my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); + +$code.=<<___; +.machine "any" + +.text + +.align 7 +rcon: +.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev +.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev +.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev +.long 0,0,0,0 ?asis +Lconsts: + mflr r0 + bcl 20,31,\$+4 + mflr $ptr #vvvvv "distance between . and rcon + addi $ptr,$ptr,-0x48 + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.asciz "AES for PowerISA 2.07, CRYPTOGAMS by " + +.globl .${prefix}_set_encrypt_key +.align 5 +.${prefix}_set_encrypt_key: +Lset_encrypt_key: + mflr r11 + $PUSH r11,$LRSAVE($sp) + + li $ptr,-1 + ${UCMP}i $inp,0 + beq- Lenc_key_abort # if ($inp==0) return -1; + ${UCMP}i $out,0 + beq- Lenc_key_abort # if ($out==0) return -1; + li $ptr,-2 + cmpwi $bits,128 + blt- Lenc_key_abort + cmpwi $bits,256 + bgt- Lenc_key_abort + andi. r0,$bits,0x3f + bne- Lenc_key_abort + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + bl Lconsts + mtlr r11 + + neg r9,$inp + lvx $in0,0,$inp + addi $inp,$inp,15 # 15 is not typo + lvsr $key,0,r9 # borrow $key + li r8,0x20 + cmpwi $bits,192 + lvx $in1,0,$inp + le?vspltisb $mask,0x0f # borrow $mask + lvx $rcon,0,$ptr + le?vxor $key,$key,$mask # adjust for byte swap + lvx $mask,r8,$ptr + addi $ptr,$ptr,0x10 + vperm $in0,$in0,$in1,$key # align [and byte swap in LE] + li $cnt,8 + vxor $zero,$zero,$zero + mtctr $cnt + + ?lvsr $outperm,0,$out + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$zero,$outmask,$outperm + + blt Loop128 + addi $inp,$inp,8 + beq L192 + addi $inp,$inp,8 + b L256 + +.align 4 +Loop128: + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + bdnz Loop128 + + lvx $rcon,0,$ptr # last two round keys + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + + vperm $key,$in0,$in0,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + + addi $inp,$out,15 # 15 is not typo + addi $out,$out,0x50 + + li $rounds,10 + b Ldone + +.align 4 +L192: + lvx $tmp,0,$inp + li $cnt,4 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + vspltisb $key,8 # borrow $key + mtctr $cnt + vsububm $mask,$mask,$key # adjust the mask + +Loop192: + vperm $key,$in1,$in1,$mask # roate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vcipherlast $key,$key,$rcon + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + + vsldoi $stage,$zero,$in1,8 + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vsldoi $stage,$stage,$in0,8 + + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vsldoi $stage,$in0,$in1,8 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vperm $outtail,$stage,$stage,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + stvx $stage,0,$out + addi $out,$out,16 + + vspltw $tmp,$in0,3 + vxor $tmp,$tmp,$in1 + vsldoi $in1,$zero,$in1,12 # >>32 + vadduwm $rcon,$rcon,$rcon + vxor $in1,$in1,$tmp + vxor $in0,$in0,$key + vxor $in1,$in1,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdnz Loop192 + + li $rounds,12 + addi $out,$out,0x20 + b Ldone + +.align 4 +L256: + lvx $tmp,0,$inp + li $cnt,7 + li $rounds,14 + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $out,$out,16 + vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] + mtctr $cnt + +Loop256: + vperm $key,$in1,$in1,$mask # rotate-n-splat + vsldoi $tmp,$zero,$in0,12 # >>32 + vperm $outtail,$in1,$in1,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + vcipherlast $key,$key,$rcon + stvx $stage,0,$out + addi $out,$out,16 + + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in0,$in0,$tmp + vadduwm $rcon,$rcon,$rcon + vxor $in0,$in0,$key + vperm $outtail,$in0,$in0,$outperm # rotate + vsel $stage,$outhead,$outtail,$outmask + vmr $outhead,$outtail + stvx $stage,0,$out + addi $inp,$out,15 # 15 is not typo + addi $out,$out,16 + bdz Ldone + + vspltw $key,$in0,3 # just splat + vsldoi $tmp,$zero,$in1,12 # >>32 + vsbox $key,$key + + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + vsldoi $tmp,$zero,$tmp,12 # >>32 + vxor $in1,$in1,$tmp + + vxor $in1,$in1,$key + b Loop256 + +.align 4 +Ldone: + lvx $in1,0,$inp # redundant in aligned case + vsel $in1,$outhead,$in1,$outmask + stvx $in1,0,$inp + li $ptr,0 + mtspr 256,$vrsave + stw $rounds,0($out) + +Lenc_key_abort: + mr r3,$ptr + blr + .long 0 + .byte 0,12,0x14,1,0,0,3,0 + .long 0 +.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key + +.globl .${prefix}_set_decrypt_key +.align 5 +.${prefix}_set_decrypt_key: + $STU $sp,-$FRAME($sp) + mflr r10 + $PUSH r10,$FRAME+$LRSAVE($sp) + bl Lset_encrypt_key + mtlr r10 + + cmpwi r3,0 + bne- Ldec_key_abort + + slwi $cnt,$rounds,4 + subi $inp,$out,240 # first round key + srwi $rounds,$rounds,1 + add $out,$inp,$cnt # last round key + mtctr $rounds + +Ldeckey: + lwz r0, 0($inp) + lwz r6, 4($inp) + lwz r7, 8($inp) + lwz r8, 12($inp) + addi $inp,$inp,16 + lwz r9, 0($out) + lwz r10,4($out) + lwz r11,8($out) + lwz r12,12($out) + stw r0, 0($out) + stw r6, 4($out) + stw r7, 8($out) + stw r8, 12($out) + subi $out,$out,16 + stw r9, -16($inp) + stw r10,-12($inp) + stw r11,-8($inp) + stw r12,-4($inp) + bdnz Ldeckey + + xor r3,r3,r3 # return value +Ldec_key_abort: + addi $sp,$sp,$FRAME + blr + .long 0 + .byte 0,12,4,1,0x80,0,3,0 + .long 0 +.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key +___ +}}} +######################################################################### +{{{ # Single block en- and decrypt procedures # +sub gen_block () { +my $dir = shift; +my $n = $dir eq "de" ? "n" : ""; +my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); + +$code.=<<___; +.globl .${prefix}_${dir}crypt +.align 5 +.${prefix}_${dir}crypt: + lwz $rounds,240($key) + lis r0,0xfc00 + mfspr $vrsave,256 + li $idx,15 # 15 is not typo + mtspr 256,r0 + + lvx v0,0,$inp + neg r11,$out + lvx v1,$idx,$inp + lvsl v2,0,$inp # inpperm + le?vspltisb v4,0x0f + ?lvsl v3,0,r11 # outperm + le?vxor v2,v2,v4 + li $idx,16 + vperm v0,v0,v1,v2 # align [and byte swap in LE] + lvx v1,0,$key + ?lvsl v5,0,$key # keyperm + srwi $rounds,$rounds,1 + lvx v2,$idx,$key + addi $idx,$idx,16 + subi $rounds,$rounds,1 + ?vperm v1,v1,v2,v5 # align round key + + vxor v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + mtctr $rounds + +Loop_${dir}c: + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + addi $idx,$idx,16 + ?vperm v1,v1,v2,v5 + v${n}cipher v0,v0,v1 + lvx v1,$idx,$key + addi $idx,$idx,16 + bdnz Loop_${dir}c + + ?vperm v2,v2,v1,v5 + v${n}cipher v0,v0,v2 + lvx v2,$idx,$key + ?vperm v1,v1,v2,v5 + v${n}cipherlast v0,v0,v1 + + vspltisb v2,-1 + vxor v1,v1,v1 + li $idx,15 # 15 is not typo + ?vperm v2,v1,v2,v3 # outmask + le?vxor v3,v3,v4 + lvx v1,0,$out # outhead + vperm v0,v0,v0,v3 # rotate [and byte swap in LE] + vsel v1,v1,v0,v2 + lvx v4,$idx,$out + stvx v1,0,$out + vsel v0,v0,v4,v2 + stvx v0,$idx,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,3,0 + .long 0 +.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt +___ +} +&gen_block("en"); +&gen_block("de"); +}}} +######################################################################### +{{{ # CBC en- and decrypt procedures # +my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= + map("v$_",(4..10)); +$code.=<<___; +.globl .${prefix}_cbc_encrypt +.align 5 +.${prefix}_cbc_encrypt: + ${UCMP}i $len,16 + bltlr- + + cmpwi $enc,0 # test direction + lis r0,0xffe0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + beq Lcbc_dec + +Lcbc_enc: + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $inout,$inout,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + vxor $inout,$inout,$ivec + +Loop_cbc_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $ivec,$inout,$rndkey0 + ${UCMP}i $len,16 + + vperm $tmp,$ivec,$ivec,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_enc + + b Lcbc_done + +.align 4 +Lcbc_dec: + ${UCMP}i $len,128 + bge _aesp8_cbc_decrypt8x + vmr $tmp,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + mtctr $rounds + subi $len,$len,16 # len-=16 + + lvx $rndkey0,0,$key + vperm $tmp,$tmp,$inptail,$inpperm + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$tmp,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + +Loop_cbc_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_cbc_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipherlast $inout,$inout,$rndkey0 + ${UCMP}i $len,16 + + vxor $inout,$inout,$ivec + vmr $ivec,$tmp + vperm $tmp,$inout,$inout,$outperm + vsel $inout,$outhead,$tmp,$outmask + vmr $outhead,$tmp + stvx $inout,0,$out + addi $out,$out,16 + bge Lcbc_dec + +Lcbc_done: + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + neg $enc,$ivp # write [unaligned] iv + li $idx,15 # 15 is not typo + vxor $rndkey0,$rndkey0,$rndkey0 + vspltisb $outmask,-1 + le?vspltisb $tmp,0x0f + ?lvsl $outperm,0,$enc + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + lvx $outhead,0,$ivp + vperm $ivec,$ivec,$ivec,$outperm + vsel $inout,$outhead,$ivec,$outmask + lvx $inptail,$idx,$ivp + stvx $inout,0,$ivp + vsel $inout,$ivec,$inptail,$outmask + stvx $inout,$idx,$ivp + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CBC decrypt procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment + +$code.=<<___; +.align 5 +_aesp8_cbc_decrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + subi $len,$len,128 # bias + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_cbc_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_cbc_dec_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + #lvx $inptail,0,$inp # "caller" already did this + #addi $inp,$inp,15 # 15 is not typo + subi $inp,$inp,15 # undo "caller" + + le?li $idx,8 + lvx_u $in0,$x00,$inp # load first 8 "words" + le?lvsl $inpperm,0,$idx + le?vspltisb $tmp,0x0f + lvx_u $in1,$x10,$inp + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + lvx_u $in2,$x20,$inp + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in3,$x30,$inp + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in4,$x40,$inp + le?vperm $in2,$in2,$in2,$inpperm + vxor $out0,$in0,$rndkey0 + lvx_u $in5,$x50,$inp + le?vperm $in3,$in3,$in3,$inpperm + vxor $out1,$in1,$rndkey0 + lvx_u $in6,$x60,$inp + le?vperm $in4,$in4,$in4,$inpperm + vxor $out2,$in2,$rndkey0 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + le?vperm $in5,$in5,$in5,$inpperm + vxor $out3,$in3,$rndkey0 + le?vperm $in6,$in6,$in6,$inpperm + vxor $out4,$in4,$rndkey0 + le?vperm $in7,$in7,$in7,$inpperm + vxor $out5,$in5,$rndkey0 + vxor $out6,$in6,$rndkey0 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + b Loop_cbc_dec8x +.align 5 +Loop_cbc_dec8x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x + + subic $len,$len,128 # $len-=128 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + and r0,r0,$len + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vncipher $out0,$out0,v30 + vxor $ivec,$ivec,v31 # xor with last round key + vncipher $out1,$out1,v30 + vxor $in0,$in0,v31 + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + vncipherlast $out0,$out0,$ivec + vncipherlast $out1,$out1,$in0 + lvx_u $in0,$x00,$inp # load next input block + vncipherlast $out2,$out2,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out3,$out3,$in2 + le?vperm $in0,$in0,$in0,$inpperm + lvx_u $in2,$x20,$inp + vncipherlast $out4,$out4,$in3 + le?vperm $in1,$in1,$in1,$inpperm + lvx_u $in3,$x30,$inp + vncipherlast $out5,$out5,$in4 + le?vperm $in2,$in2,$in2,$inpperm + lvx_u $in4,$x40,$inp + vncipherlast $out6,$out6,$in5 + le?vperm $in3,$in3,$in3,$inpperm + lvx_u $in5,$x50,$inp + vncipherlast $out7,$out7,$in6 + le?vperm $in4,$in4,$in4,$inpperm + lvx_u $in6,$x60,$inp + vmr $ivec,$in7 + le?vperm $in5,$in5,$in5,$inpperm + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $in6,$in6,$in6,$inpperm + vxor $out0,$in0,$rndkey0 + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $in7,$in7,$in7,$inpperm + vxor $out1,$in1,$rndkey0 + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$rndkey0 + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$rndkey0 + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$rndkey0 + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + vxor $out5,$in5,$rndkey0 + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + vxor $out6,$in6,$rndkey0 + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + vxor $out7,$in7,$rndkey0 + + mtctr $rounds + beq Loop_cbc_dec8x # did $len-=128 borrow? + + addic. $len,$len,128 + beq Lcbc_dec8x_done + nop + nop + +Loop_cbc_dec8x_tail: # up to 7 "words" tail... + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_cbc_dec8x_tail + + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + vncipher $out6,$out6,v24 + vncipher $out7,$out7,v24 + + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + vncipher $out6,$out6,v25 + vncipher $out7,$out7,v25 + + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + vncipher $out6,$out6,v26 + vncipher $out7,$out7,v26 + + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + vncipher $out6,$out6,v27 + vncipher $out7,$out7,v27 + + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + vncipher $out6,$out6,v28 + vncipher $out7,$out7,v28 + + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + vncipher $out6,$out6,v29 + vncipher $out7,$out7,v29 + + vncipher $out1,$out1,v30 + vxor $ivec,$ivec,v31 # last round key + vncipher $out2,$out2,v30 + vxor $in1,$in1,v31 + vncipher $out3,$out3,v30 + vxor $in2,$in2,v31 + vncipher $out4,$out4,v30 + vxor $in3,$in3,v31 + vncipher $out5,$out5,v30 + vxor $in4,$in4,v31 + vncipher $out6,$out6,v30 + vxor $in5,$in5,v31 + vncipher $out7,$out7,v30 + vxor $in6,$in6,v31 + + cmplwi $len,32 # switch($len) + blt Lcbc_dec8x_one + nop + beq Lcbc_dec8x_two + cmplwi $len,64 + blt Lcbc_dec8x_three + nop + beq Lcbc_dec8x_four + cmplwi $len,96 + blt Lcbc_dec8x_five + nop + beq Lcbc_dec8x_six + +Lcbc_dec8x_seven: + vncipherlast $out1,$out1,$ivec + vncipherlast $out2,$out2,$in1 + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out1,$out1,$out1,$inpperm + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x00,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x10,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x20,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x30,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x40,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x50,$out + stvx_u $out7,$x60,$out + addi $out,$out,0x70 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_six: + vncipherlast $out2,$out2,$ivec + vncipherlast $out3,$out3,$in2 + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out2,$out2,$out2,$inpperm + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x00,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x10,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x20,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x30,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x40,$out + stvx_u $out7,$x50,$out + addi $out,$out,0x60 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_five: + vncipherlast $out3,$out3,$ivec + vncipherlast $out4,$out4,$in3 + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out3,$out3,$out3,$inpperm + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x00,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x10,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x20,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x30,$out + stvx_u $out7,$x40,$out + addi $out,$out,0x50 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_four: + vncipherlast $out4,$out4,$ivec + vncipherlast $out5,$out5,$in4 + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out4,$out4,$out4,$inpperm + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x00,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x10,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x20,$out + stvx_u $out7,$x30,$out + addi $out,$out,0x40 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_three: + vncipherlast $out5,$out5,$ivec + vncipherlast $out6,$out6,$in5 + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out5,$out5,$out5,$inpperm + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x00,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x10,$out + stvx_u $out7,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_two: + vncipherlast $out6,$out6,$ivec + vncipherlast $out7,$out7,$in6 + vmr $ivec,$in7 + + le?vperm $out6,$out6,$out6,$inpperm + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x00,$out + stvx_u $out7,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lcbc_dec8x_one: + vncipherlast $out7,$out7,$ivec + vmr $ivec,$in7 + + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out7,0,$out + addi $out,$out,0x10 + +Lcbc_dec8x_done: + le?vperm $ivec,$ivec,$ivec,$inpperm + stvx_u $ivec,0,$ivp # write [unaligned] iv + + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x14,0,0x80,6,6,0 + .long 0 +.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt +___ +}} }}} + +######################################################################### +{{{ # CTR procedure[s] # +my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); +my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= + map("v$_",(4..11)); +my $dat=$tmp; + +$code.=<<___; +.globl .${prefix}_ctr32_encrypt_blocks +.align 5 +.${prefix}_ctr32_encrypt_blocks: + ${UCMP}i $len,1 + bltlr- + + lis r0,0xfff0 + mfspr $vrsave,256 + mtspr 256,r0 + + li $idx,15 + vxor $rndkey0,$rndkey0,$rndkey0 + le?vspltisb $tmp,0x0f + + lvx $ivec,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + vspltisb $one,1 + le?vxor $inpperm,$inpperm,$tmp + vperm $ivec,$ivec,$inptail,$inpperm + vsldoi $one,$rndkey0,$one,1 + + neg r11,$inp + ?lvsl $keyperm,0,$key # prepare for unaligned key + lwz $rounds,240($key) + + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inptail,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + srwi $rounds,$rounds,1 + li $idx,16 + subi $rounds,$rounds,1 + + ${UCMP}i $len,8 + bge _aesp8_ctr32_encrypt8x + + ?lvsr $outperm,0,$out # prepare for unaligned store + vspltisb $outmask,-1 + lvx $outhead,0,$out + ?vperm $outmask,$rndkey0,$outmask,$outperm + le?vxor $outperm,$outperm,$tmp + + lvx $rndkey0,0,$key + mtctr $rounds + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + b Loop_ctr32_enc + +.align 5 +Loop_ctr32_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + bdnz Loop_ctr32_enc + + vadduwm $ivec,$ivec,$one + vmr $dat,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + subic. $len,$len,1 # blocks-- + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key + vperm $dat,$dat,$inptail,$inpperm + li $idx,16 + ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm + lvx $rndkey0,0,$key + vxor $dat,$dat,$rndkey1 # last round key + vcipherlast $inout,$inout,$dat + + lvx $rndkey1,$idx,$key + addi $idx,$idx,16 + vperm $inout,$inout,$inout,$outperm + vsel $dat,$outhead,$inout,$outmask + mtctr $rounds + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vmr $outhead,$inout + vxor $inout,$ivec,$rndkey0 + lvx $rndkey0,$idx,$key + addi $idx,$idx,16 + stvx $dat,0,$out + addi $out,$out,16 + bne Loop_ctr32_enc + + addi $out,$out,-1 + lvx $inout,0,$out # redundant in aligned case + vsel $inout,$outhead,$inout,$outmask + stvx $inout,0,$out + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,6,0 + .long 0 +___ +######################################################################### +{{ # Optimized CTR procedure # +my $key_="r11"; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); +my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); +my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment +my ($two,$three,$four)=($outhead,$outperm,$outmask); + +$code.=<<___; +.align 5 +_aesp8_ctr32_encrypt8x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key # load key schedule + lvx v30,$x10,$key + addi $key,$key,0x20 + lvx v31,$x00,$key + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_ctr32_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key + addi $key,$key,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_ctr32_enc_key + + lvx v26,$x10,$key + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key + ?vperm v29,v29,v30,$keyperm + lvx $out0,$x70,$key # borrow $out0 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$out0,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vadduwm $two,$one,$one + subi $inp,$inp,15 # undo "caller" + $SHL $len,$len,4 + + vadduwm $out1,$ivec,$one # counter values ... + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + le?li $idx,8 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + le?lvsl $inpperm,0,$idx + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + le?vspltisb $tmp,0x0f + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + vxor $out7,$out7,$rndkey0 + + mtctr $rounds + b Loop_ctr32_enc8x +.align 5 +Loop_ctr32_enc8x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 +Loop_ctr32_enc8x_middle: + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_ctr32_enc8x + + subic r11,$len,256 # $len-256, borrow $key_ + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + vcipher $out6,$out6,v24 + vcipher $out7,$out7,v24 + + subfe r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + vcipher $out6,$out6,v25 + vcipher $out7,$out7,v25 + + and r0,r0,r11 + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + vcipher $out6,$out6,v26 + vcipher $out7,$out7,v26 + lvx v24,$x00,$key_ # re-pre-load round[1] + + subic $len,$len,129 # $len-=129 + vcipher $out0,$out0,v27 + addi $len,$len,1 # $len-=128 really + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + vcipher $out6,$out6,v27 + vcipher $out7,$out7,v27 + lvx v25,$x10,$key_ # re-pre-load round[2] + + vcipher $out0,$out0,v28 + lvx_u $in0,$x00,$inp # load input + vcipher $out1,$out1,v28 + lvx_u $in1,$x10,$inp + vcipher $out2,$out2,v28 + lvx_u $in2,$x20,$inp + vcipher $out3,$out3,v28 + lvx_u $in3,$x30,$inp + vcipher $out4,$out4,v28 + lvx_u $in4,$x40,$inp + vcipher $out5,$out5,v28 + lvx_u $in5,$x50,$inp + vcipher $out6,$out6,v28 + lvx_u $in6,$x60,$inp + vcipher $out7,$out7,v28 + lvx_u $in7,$x70,$inp + addi $inp,$inp,0x80 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v29 + le?vperm $in1,$in1,$in1,$inpperm + vcipher $out2,$out2,v29 + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out3,$out3,v29 + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out4,$out4,v29 + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out5,$out5,v29 + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out6,$out6,v29 + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out7,$out7,v29 + le?vperm $in7,$in7,$in7,$inpperm + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in7 are loaded + # with last "words" + subfe. r0,r0,r0 # borrow?-1:0 + vcipher $out0,$out0,v30 + vxor $in0,$in0,v31 # xor with last round key + vcipher $out1,$out1,v30 + vxor $in1,$in1,v31 + vcipher $out2,$out2,v30 + vxor $in2,$in2,v31 + vcipher $out3,$out3,v30 + vxor $in3,$in3,v31 + vcipher $out4,$out4,v30 + vxor $in4,$in4,v31 + vcipher $out5,$out5,v30 + vxor $in5,$in5,v31 + vcipher $out6,$out6,v30 + vxor $in6,$in6,v31 + vcipher $out7,$out7,v30 + vxor $in7,$in7,v31 + + bne Lctr32_enc8x_break # did $len-129 borrow? + + vcipherlast $in0,$out0,$in0 + vcipherlast $in1,$out1,$in1 + vadduwm $out1,$ivec,$one # counter values ... + vcipherlast $in2,$out2,$in2 + vadduwm $out2,$ivec,$two + vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] + vcipherlast $in3,$out3,$in3 + vadduwm $out3,$out1,$two + vxor $out1,$out1,$rndkey0 + vcipherlast $in4,$out4,$in4 + vadduwm $out4,$out2,$two + vxor $out2,$out2,$rndkey0 + vcipherlast $in5,$out5,$in5 + vadduwm $out5,$out3,$two + vxor $out3,$out3,$rndkey0 + vcipherlast $in6,$out6,$in6 + vadduwm $out6,$out4,$two + vxor $out4,$out4,$rndkey0 + vcipherlast $in7,$out7,$in7 + vadduwm $out7,$out5,$two + vxor $out5,$out5,$rndkey0 + le?vperm $in0,$in0,$in0,$inpperm + vadduwm $ivec,$out6,$two # next counter value + vxor $out6,$out6,$rndkey0 + le?vperm $in1,$in1,$in1,$inpperm + vxor $out7,$out7,$rndkey0 + mtctr $rounds + + vcipher $out0,$out0,v24 + stvx_u $in0,$x00,$out + le?vperm $in2,$in2,$in2,$inpperm + vcipher $out1,$out1,v24 + stvx_u $in1,$x10,$out + le?vperm $in3,$in3,$in3,$inpperm + vcipher $out2,$out2,v24 + stvx_u $in2,$x20,$out + le?vperm $in4,$in4,$in4,$inpperm + vcipher $out3,$out3,v24 + stvx_u $in3,$x30,$out + le?vperm $in5,$in5,$in5,$inpperm + vcipher $out4,$out4,v24 + stvx_u $in4,$x40,$out + le?vperm $in6,$in6,$in6,$inpperm + vcipher $out5,$out5,v24 + stvx_u $in5,$x50,$out + le?vperm $in7,$in7,$in7,$inpperm + vcipher $out6,$out6,v24 + stvx_u $in6,$x60,$out + vcipher $out7,$out7,v24 + stvx_u $in7,$x70,$out + addi $out,$out,0x80 + + b Loop_ctr32_enc8x_middle + +.align 5 +Lctr32_enc8x_break: + cmpwi $len,-0x60 + blt Lctr32_enc8x_one + nop + beq Lctr32_enc8x_two + cmpwi $len,-0x40 + blt Lctr32_enc8x_three + nop + beq Lctr32_enc8x_four + cmpwi $len,-0x20 + blt Lctr32_enc8x_five + nop + beq Lctr32_enc8x_six + cmpwi $len,0x00 + blt Lctr32_enc8x_seven + +Lctr32_enc8x_eight: + vcipherlast $out0,$out0,$in0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + vcipherlast $out5,$out5,$in5 + vcipherlast $out6,$out6,$in6 + vcipherlast $out7,$out7,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + le?vperm $out7,$out7,$out7,$inpperm + stvx_u $out6,$x60,$out + stvx_u $out7,$x70,$out + addi $out,$out,0x80 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_seven: + vcipherlast $out0,$out0,$in1 + vcipherlast $out1,$out1,$in2 + vcipherlast $out2,$out2,$in3 + vcipherlast $out3,$out3,$in4 + vcipherlast $out4,$out4,$in5 + vcipherlast $out5,$out5,$in6 + vcipherlast $out6,$out6,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + le?vperm $out6,$out6,$out6,$inpperm + stvx_u $out5,$x50,$out + stvx_u $out6,$x60,$out + addi $out,$out,0x70 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_six: + vcipherlast $out0,$out0,$in2 + vcipherlast $out1,$out1,$in3 + vcipherlast $out2,$out2,$in4 + vcipherlast $out3,$out3,$in5 + vcipherlast $out4,$out4,$in6 + vcipherlast $out5,$out5,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + le?vperm $out5,$out5,$out5,$inpperm + stvx_u $out4,$x40,$out + stvx_u $out5,$x50,$out + addi $out,$out,0x60 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_five: + vcipherlast $out0,$out0,$in3 + vcipherlast $out1,$out1,$in4 + vcipherlast $out2,$out2,$in5 + vcipherlast $out3,$out3,$in6 + vcipherlast $out4,$out4,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$inpperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_four: + vcipherlast $out0,$out0,$in4 + vcipherlast $out1,$out1,$in5 + vcipherlast $out2,$out2,$in6 + vcipherlast $out3,$out3,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$inpperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + b Lctr32_enc8x_done + +.align 5 +Lctr32_enc8x_three: + vcipherlast $out0,$out0,$in5 + vcipherlast $out1,$out1,$in6 + vcipherlast $out2,$out2,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + le?vperm $out2,$out2,$out2,$inpperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_two: + vcipherlast $out0,$out0,$in6 + vcipherlast $out1,$out1,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + le?vperm $out1,$out1,$out1,$inpperm + stvx_u $out0,$x00,$out + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + b Lcbc_dec8x_done + +.align 5 +Lctr32_enc8x_one: + vcipherlast $out0,$out0,$in7 + + le?vperm $out0,$out0,$out0,$inpperm + stvx_u $out0,0,$out + addi $out,$out,0x10 + +Lctr32_enc8x_done: + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $inpperm,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + stvx $inpperm,r10,$sp + addi r10,r10,32 + stvx $inpperm,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x14,0,0x80,6,6,0 + .long 0 +.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks +___ +}} }}} + +my $consts=1; +foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/geo; + + # constants table endian-specific conversion + if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { + my $conv=$3; + my @bytes=(); + + # convert to endian-agnostic format + if ($1 eq "long") { + foreach (split(/,\s*/,$2)) { + my $l = /^0/?oct:int; + push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; + } + } else { + @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); + } + + # little-endian conversion + if ($flavour =~ /le$/o) { + SWITCH: for($conv) { + /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; + } + } + + #emit + print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; + next; + } + $consts=0 if (m/Lconsts:/o); # end of table + + # instructions prefixed with '?' are endian-specific and need + # to be adjusted accordingly... + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o or + s/\?lvsr/lvsl/o or + s/\?lvsl/lvsr/o or + s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or + s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or + s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; + } else { # big-endian + s/le\?/#le#/o or + s/be\?//o or + s/\?([a-z]+)/$1/o; + } + + print $_,"\n"; +} + +close STDOUT; diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index ae1fd0a2ed9f9..ef76da93a4594 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -155,9 +155,18 @@ void AES_xts_decrypt(const char *inp,char *out,size_t len, const unsigned char iv[16]); #endif -#if defined(VPAES_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) -extern unsigned int OPENSSL_ppccap_P; -#define VPAES_CAPABLE (OPENSSL_ppccap_P&(1<<1)) +#if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +# include "ppc_arch.h" +# ifdef VPAES_ASM +# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC) +# endif +# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207) +# define HWAES_set_encrypt_key aes_p8_set_encrypt_key +# define HWAES_set_decrypt_key aes_p8_set_decrypt_key +# define HWAES_encrypt aes_p8_encrypt +# define HWAES_decrypt aes_p8_decrypt +# define HWAES_cbc_encrypt aes_p8_cbc_encrypt +# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks #endif #if defined(AES_ASM) && !defined(I386_ONLY) && ( \ diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile index e68aef611a5ea..7327b51b9e13a 100644 --- a/crypto/modes/Makefile +++ b/crypto/modes/Makefile @@ -62,6 +62,8 @@ ghash-parisc.s: asm/ghash-parisc.pl $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ ghashv8-armx.S: asm/ghashv8-armx.pl $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@ +ghashp8-ppc.s: asm/ghashp8-ppc.pl + $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@ # GNU make "catch all" ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl new file mode 100755 index 0000000000000..e76a58c343c1c --- /dev/null +++ b/crypto/modes/asm/ghashp8-ppc.pl @@ -0,0 +1,234 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# GHASH for for PowerISA v2.07. +# +# July 2014 +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This initial +# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x +# faster than "4-bit" integer-only compiler-generated 64-bit code. +# "Initial version" means that there is room for futher improvement. + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block + +my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); +my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); +my $vrsave="r12"; + +$code=<<___; +.machine "any" + +.text + +.globl .gcm_init_p8 +.align 5 +.gcm_init_p8: + lis r0,0xfff0 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $H,0,r4 # load H + + vspltisb $xC2,-16 # 0xf0 + vspltisb $t0,1 # one + vaddubm $xC2,$xC2,$xC2 # 0xe0 + vxor $zero,$zero,$zero + vor $xC2,$xC2,$t0 # 0xe1 + vsldoi $xC2,$xC2,$zero,15 # 0xe1... + vsldoi $t1,$zero,$t0,1 # ...1 + vaddubm $xC2,$xC2,$xC2 # 0xc2... + vspltisb $t2,7 + vor $xC2,$xC2,$t1 # 0xc2....01 + vspltb $t1,$H,0 # most significant byte + vsl $H,$H,$t0 # H<<=1 + vsrab $t1,$t1,$t2 # broadcast carry bit + vand $t1,$t1,$xC2 + vxor $H,$H,$t1 # twisted H + + vsldoi $H,$H,$H,8 # twist even more ... + vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 + vsldoi $Hl,$zero,$H,8 # ... and split + vsldoi $Hh,$H,$zero,8 + + stvx_u $xC2,0,r3 # save pre-computed table + stvx_u $Hl,r8,r3 + stvx_u $H, r9,r3 + stvx_u $Hh,r10,r3 + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_init_p8,.-.gcm_init_p8 + +.globl .gcm_gmult_p8 +.align 5 +.gcm_gmult_p8: + lis r0,0xfff8 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $IN,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $IN,$IN,$IN,$lemask + vxor $zero,$zero,$zero + + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + + vpmsumd $t2,$Xl,$xC2 # 1st phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + + vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vpmsumd $Xl,$Xl,$xC2 + vxor $t1,$t1,$Xh + vxor $Xl,$Xl,$t1 + + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 +.size .gcm_gmult_p8,.-.gcm_gmult_p8 + +.globl .gcm_ghash_p8 +.align 5 +.gcm_ghash_p8: + lis r0,0xfff8 + li r8,0x10 + mfspr $vrsave,256 + li r9,0x20 + mtspr 256,r0 + li r10,0x30 + lvx_u $Xl,0,$Xip # load Xi + + lvx_u $Hl,r8,$Htbl # load pre-computed table + le?lvsl $lemask,r0,r0 + lvx_u $H, r9,$Htbl + le?vspltisb $t0,0x07 + lvx_u $Hh,r10,$Htbl + le?vxor $lemask,$lemask,$t0 + lvx_u $xC2,0,$Htbl + le?vperm $Xl,$Xl,$Xl,$lemask + vxor $zero,$zero,$zero + + lvx_u $IN,0,$inp + addi $inp,$inp,16 + subi $len,$len,16 + le?vperm $IN,$IN,$IN,$lemask + vxor $IN,$IN,$Xl + b Loop + +.align 5 +Loop: + subic $len,$len,16 + vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo + subfe. r0,r0,r0 # borrow?-1:0 + vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi + and r0,r0,$len + vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi + add $inp,$inp,r0 + + vpmsumd $t2,$Xl,$xC2 # 1st phase + + vsldoi $t0,$Xm,$zero,8 + vsldoi $t1,$zero,$Xm,8 + vxor $Xl,$Xl,$t0 + vxor $Xh,$Xh,$t1 + + vsldoi $Xl,$Xl,$Xl,8 + vxor $Xl,$Xl,$t2 + lvx_u $IN,0,$inp + addi $inp,$inp,16 + + vsldoi $t1,$Xl,$Xl,8 # 2nd phase + vpmsumd $Xl,$Xl,$xC2 + le?vperm $IN,$IN,$IN,$lemask + vxor $t1,$t1,$Xh + vxor $IN,$IN,$t1 + vxor $IN,$IN,$Xl + beq Loop # did $len-=16 borrow? + + vxor $Xl,$Xl,$t1 + le?vperm $Xl,$Xl,$Xl,$lemask + stvx_u $Xl,0,$Xip # write out Xi + + mtspr 256,$vrsave + blr + .long 0 + .byte 0,12,0x14,0,0,0,4,0 + .long 0 +.size .gcm_ghash_p8,.-.gcm_ghash_p8 + +.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +foreach (split("\n",$code)) { + if ($flavour =~ /le$/o) { # little-endian + s/le\?//o or + s/be\?/#be#/o; + } else { + s/le\?/#le#/o or + s/be\?//o; + } + print $_,"\n"; +} + +close STDOUT; # enforce flush diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index f5d45212c95d6..878c9930db5ce 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -697,6 +697,13 @@ extern unsigned int OPENSSL_sparcv9cap_P[]; void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]); void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]); void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) +# include "ppc_arch.h" +# define GHASH_ASM_PPC +# define GCM_FUNCREF_4BIT +void gcm_init_p8(u128 Htable[16],const u64 Xi[2]); +void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]); +void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); # endif #endif @@ -800,6 +807,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) ctx->gmult = gcm_gmult_4bit; ctx->ghash = gcm_ghash_4bit; } +# elif defined(GHASH_ASM_PPC) + if (OPENSSL_ppccap_P & PPC_CRYPTO207) { + gcm_init_p8(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_p8; + ctx->ghash = gcm_ghash_p8; + } else { + gcm_init_4bit(ctx->Htable,ctx->H.u); + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } # else gcm_init_4bit(ctx->Htable,ctx->H.u); # endif diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl index a67eef57d5b6d..f89e814299316 100755 --- a/crypto/perlasm/ppc-xlate.pl +++ b/crypto/perlasm/ppc-xlate.pl @@ -151,6 +151,42 @@ " vor $vx,$vy,$vy"; }; +# PowerISA 2.06 stuff +sub vsxmem_op { + my ($f, $vrt, $ra, $rb, $op) = @_; + " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); +} +# made-up unaligned memory reference AltiVec/VMX instructions +my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x +my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x +my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx +my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx +my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x +my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x + +# PowerISA 2.07 stuff +sub vcrypto_op { + my ($f, $vrt, $vra, $vrb, $op) = @_; + " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; +} +my $vcipher = sub { vcrypto_op(@_, 1288); }; +my $vcipherlast = sub { vcrypto_op(@_, 1289); }; +my $vncipher = sub { vcrypto_op(@_, 1352); }; +my $vncipherlast= sub { vcrypto_op(@_, 1353); }; +my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; +my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; +my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; +my $vpmsumb = sub { vcrypto_op(@_, 1032); }; +my $vpmsumd = sub { vcrypto_op(@_, 1224); }; +my $vpmsubh = sub { vcrypto_op(@_, 1096); }; +my $vpmsumw = sub { vcrypto_op(@_, 1160); }; +my $vaddudm = sub { vcrypto_op(@_, 192); }; + +my $mtsle = sub { + my ($f, $arg) = @_; + " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); +}; + while($line=<>) { $line =~ s|[#!;].*$||; # get rid of asm-style comments... diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h new file mode 100644 index 0000000000000..1192edfa2d4bf --- /dev/null +++ b/crypto/ppc_arch.h @@ -0,0 +1,10 @@ +#ifndef __PPC_ARCH_H__ +#define __PPC_ARCH_H__ + +extern unsigned int OPENSSL_ppccap_P; + +#define PPC_FPU64 (1<<0) +#define PPC_ALTIVEC (1<<1) +#define PPC_CRYPTO207 (1<<2) + +#endif diff --git a/crypto/ppccap.c b/crypto/ppccap.c index d78bfbcdbb882..13c2ca51627cb 100644 --- a/crypto/ppccap.c +++ b/crypto/ppccap.c @@ -10,8 +10,7 @@ #include #include -#define PPC_FPU64 (1<<0) -#define PPC_ALTIVEC (1<<1) +#include "ppc_arch.h" unsigned int OPENSSL_ppccap_P = 0; @@ -53,11 +52,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U } #endif +void sha256_block_p8(void *ctx,const void *inp,size_t len); +void sha256_block_ppc(void *ctx,const void *inp,size_t len); +void sha256_block_data_order(void *ctx,const void *inp,size_t len) + { + OPENSSL_ppccap_P&PPC_CRYPTO207? sha256_block_p8(ctx,inp,len): + sha256_block_ppc(ctx,inp,len); + } + +void sha512_block_p8(void *ctx,const void *inp,size_t len); +void sha512_block_ppc(void *ctx,const void *inp,size_t len); +void sha512_block_data_order(void *ctx,const void *inp,size_t len) + { + OPENSSL_ppccap_P&PPC_CRYPTO207? sha512_block_p8(ctx,inp,len): + sha512_block_ppc(ctx,inp,len); + } + static sigjmp_buf ill_jmp; static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } void OPENSSL_ppc64_probe(void); void OPENSSL_altivec_probe(void); +void OPENSSL_crypto207_probe(void); void OPENSSL_cpuid_setup(void) { @@ -128,6 +144,11 @@ void OPENSSL_cpuid_setup(void) { OPENSSL_altivec_probe(); OPENSSL_ppccap_P |= PPC_ALTIVEC; + if (sigsetjmp(ill_jmp,1) == 0) + { + OPENSSL_crypto207_probe(); + OPENSSL_ppccap_P |= PPC_CRYPTO207; + } } sigaction (SIGILL,&ill_oact,NULL); diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl index 54da0286e915e..8d800fe7d36fa 100755 --- a/crypto/ppccpuid.pl +++ b/crypto/ppccpuid.pl @@ -42,6 +42,16 @@ .byte 0,12,0x14,0,0,0,0,0 .size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe +.globl .OPENSSL_crypto207_probe +.align 4 +.OPENSSL_crypto207_probe: + lvx_u v0,0,r1 + vcipher v0,v0,v0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe + .globl .OPENSSL_wipe_cpu .align 4 .OPENSSL_wipe_cpu: diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile index 50734bf4a7723..a7fac4683ea5e 100644 --- a/crypto/sha/Makefile +++ b/crypto/sha/Makefile @@ -77,6 +77,8 @@ sha512-sparcv9.S:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAG sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ sha512-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ +sha256p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ +sha512p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ sha1-parisc.s: asm/sha1-parisc.pl; $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@ sha256-parisc.s:asm/sha512-parisc.pl; $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@ diff --git a/crypto/sha/asm/sha512-ppc.pl b/crypto/sha/asm/sha512-ppc.pl index 5c3ac2c095d2c..734f3c1ca0f09 100755 --- a/crypto/sha/asm/sha512-ppc.pl +++ b/crypto/sha/asm/sha512-ppc.pl @@ -65,7 +65,7 @@ open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; if ($output =~ /512/) { - $func="sha512_block_data_order"; + $func="sha512_block_ppc"; $SZ=8; @Sigma0=(28,34,39); @Sigma1=(14,18,41); @@ -77,7 +77,7 @@ $ROR="rotrdi"; $SHR="srdi"; } else { - $func="sha256_block_data_order"; + $func="sha256_block_ppc"; $SZ=4; @Sigma0=( 2,13,22); @Sigma1=( 6,11,25); diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl new file mode 100755 index 0000000000000..cb0268c9d6184 --- /dev/null +++ b/crypto/sha/asm/sha512p8-ppc.pl @@ -0,0 +1,423 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA256/512 for PowerISA v2.07. +# +# Accurate performance measurements are problematic, because it's +# always virtualized setup with possibly throttled processor. +# Relative comparison is therefore more informative. This module is +# ~60% faster than integer-only sha512-ppc.pl. To anchor to something +# else, SHA256 is 16% slower than sha1-ppc.pl and 2.5x slower than +# hardware-assisted aes-128-cbc encrypt. SHA512 is 33% faster than +# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting +# result is degree of computational resources' utilization. POWER8 is +# "massively multi-threaded chip" and difference between single- and +# maximum multi-process benchmark results tells that utlization is +# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and +# for sha1-ppc.pl - 73%. 100% means that multi-process result equals +# to single-process one, given that all threads end up on the same +# physical core. + +$flavour=shift; +$output =shift; + +if ($flavour =~ /64/) { + $SIZE_T=8; + $LRSAVE=2*$SIZE_T; + $STU="stdu"; + $POP="ld"; + $PUSH="std"; +} elsif ($flavour =~ /32/) { + $SIZE_T=4; + $LRSAVE=$SIZE_T; + $STU="stwu"; + $POP="lwz"; + $PUSH="stw"; +} else { die "nonsense $flavour"; } + +$LENDIAN=($flavour=~/le/); + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; + +open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + +if ($output =~ /512/) { + $bits=512; + $SZ=8; + $sz="d"; + $rounds=80; +} else { + $bits=256; + $SZ=4; + $sz="w"; + $rounds=64; +} + +$func="sha${bits}_block_p8"; +$FRAME=8*$SIZE_T; + +$sp ="r1"; +$toc="r2"; +$ctx="r3"; +$inp="r4"; +$num="r5"; +$Tbl="r6"; +$idx="r7"; +$lrsave="r8"; +$offload="r11"; +$vrsave="r12"; +($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31)); + +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); +@X=map("v$_",(8..23)); +($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31)); + +sub ROUND { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)%16; + +$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); + lvx_u @X[$i+1],0,$inp ; load X[i] in advance + addi $inp,$inp,16 +___ +$code.=<<___ if ($i<16 && ($i%(16/$SZ))); + vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ +___ +$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); + vperm @X[$i],@X[$i],@X[$i],$lemask +___ +$code.=<<___; + `"vshasigma${sz} $s0,@X[($j+1)%16],0,0" if ($i>=15)` + vsel $Func,$g,$f,$e ; Ch(e,f,g) + vshasigma${sz} $S1,$e,1,15 ; Sigma1(e) + vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] + vshasigma${sz} $S0,$a,1,0 ; Sigma0(a) + `"vshasigma${sz} $s1,@X[($j+14)%16],0,15" if ($i>=15)` + vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) + vxor $Func,$a,$b + `"vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16]" if ($i>=15)` + vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e) + vsel $Func,$b,$c,$Func ; Maj(a,b,c) + vaddu${sz}m $g,$g,$Ki ; future h+=K[i] + vaddu${sz}m $d,$d,$h ; d+=h + vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c) + `"vaddu${sz}m @X[$j],@X[$j],$s0" if ($i>=15)` + lvx $Ki,$idx,$Tbl ; load next K[i] + addi $idx,$idx,16 + vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c) + `"vaddu${sz}m @X[$j],@X[$j],$s1" if ($i>=15)` +___ +} + +$code=<<___; +.machine "any" +.text + +.globl $func +.align 6 +$func: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr $lrsave + li r10,`$FRAME+8*16+15` + li r11,`$FRAME+8*16+31` + stvx v20,r10,$sp # ABI says so + addi r10,r10,32 + mfspr $vrsave,256 + stvx v21,r11,$sp + addi r11,r11,32 + stvx v22,r10,$sp + addi r10,r10,32 + stvx v23,r11,$sp + addi r11,r11,32 + stvx v24,r10,$sp + addi r10,r10,32 + stvx v25,r11,$sp + addi r11,r11,32 + stvx v26,r10,$sp + addi r10,r10,32 + stvx v27,r11,$sp + addi r11,r11,32 + stvx v28,r10,$sp + addi r10,r10,32 + stvx v29,r11,$sp + addi r11,r11,32 + stvx v30,r10,$sp + stvx v31,r11,$sp + li r11,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + mtspr 256,r11 + + bl LPICmeup + addi $offload,$sp,$FRAME+15 +___ +$code.=<<___ if ($LENDIAN); + li $idx,8 + lvsl $lemask,0,$idx + vspltisb $Ki,0x0f + vxor $lemask,$lemask,$Ki +___ +$code.=<<___ if ($SZ==4); + lvx_4w $A,$x00,$ctx + lvx_4w $E,$x10,$ctx + vsldoi $B,$A,$A,4 # unpack + vsldoi $C,$A,$A,8 + vsldoi $D,$A,$A,12 + vsldoi $F,$E,$E,4 + vsldoi $G,$E,$E,8 + vsldoi $H,$E,$E,12 +___ +$code.=<<___ if ($SZ==8); + lvx_u $A,$x00,$ctx + lvx_u $C,$x10,$ctx + lvx_u $E,$x20,$ctx + vsldoi $B,$A,$A,8 # unpack + lvx_u $G,$x30,$ctx + vsldoi $D,$C,$C,8 + vsldoi $F,$E,$E,8 + vsldoi $H,$G,$G,8 +___ +$code.=<<___; + li r0,`($rounds-16)/16` # inner loop counter + b Loop +.align 5 +Loop: + lvx $Ki,$x00,$Tbl + li $idx,16 + lvx_u @X[0],0,$inp + addi $inp,$inp,16 + stvx $A,$x00,$offload # offload $A-$H + stvx $B,$x10,$offload + stvx $C,$x20,$offload + stvx $D,$x30,$offload + stvx $E,$x40,$offload + stvx $F,$x50,$offload + stvx $G,$x60,$offload + stvx $H,$x70,$offload + vaddu${sz}m $H,$H,$Ki # h+K[i] + lvx $Ki,$idx,$Tbl + addi $idx,$idx,16 +___ +for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mtctr r0 + b L16_xx +.align 5 +L16_xx: +___ +for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bdnz L16_xx + + lvx @X[2],$x00,$offload + subic. $num,$num,1 + lvx @X[3],$x10,$offload + vaddu${sz}m $A,$A,@X[2] + lvx @X[4],$x20,$offload + vaddu${sz}m $B,$B,@X[3] + lvx @X[5],$x30,$offload + vaddu${sz}m $C,$C,@X[4] + lvx @X[6],$x40,$offload + vaddu${sz}m $D,$D,@X[5] + lvx @X[7],$x50,$offload + vaddu${sz}m $E,$E,@X[6] + lvx @X[8],$x60,$offload + vaddu${sz}m $F,$F,@X[7] + lvx @X[9],$x70,$offload + vaddu${sz}m $G,$G,@X[8] + vaddu${sz}m $H,$H,@X[9] + bne Loop +___ +$code.=<<___ if ($SZ==4); + lvx @X[0],$idx,$Tbl + addi $idx,$idx,16 + vperm $A,$A,$B,$Ki # pack the answer + lvx @X[1],$idx,$Tbl + vperm $E,$E,$F,$Ki + vperm $A,$A,$C,@X[0] + vperm $E,$E,$G,@X[0] + vperm $A,$A,$D,@X[1] + vperm $E,$E,$H,@X[1] + stvx_4w $A,$x00,$ctx + stvx_4w $E,$x10,$ctx +___ +$code.=<<___ if ($SZ==8); + vperm $A,$A,$B,$Ki # pack the answer + vperm $C,$C,$D,$Ki + vperm $E,$E,$F,$Ki + vperm $G,$G,$H,$Ki + stvx_u $A,$x00,$ctx + stvx_u $C,$x10,$ctx + stvx_u $E,$x20,$ctx + stvx_u $G,$x30,$ctx +___ +$code.=<<___; + li r10,`$FRAME+8*16+15` + mtlr $lrsave + li r11,`$FRAME+8*16+31` + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,4,1,0x80,6,3,0 + .long 0 +.size $func,.-$func +___ + +# Ugly hack here, because PPC assembler syntax seem to vary too +# much from platforms to platform... +$code.=<<___; +.align 6 +LPICmeup: + mflr r0 + bcl 20,31,\$+4 + mflr $Tbl ; vvvvvv "distance" between . and 1st data entry + addi $Tbl,$Tbl,`64-8` + mtlr r0 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + .space `64-9*4` +___ + +if ($SZ==8) { + local *table = sub { + foreach(@_) { $code.=".quad $_,$_\n"; } + }; + table( + "0x428a2f98d728ae22","0x7137449123ef65cd", + "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", + "0x3956c25bf348b538","0x59f111f1b605d019", + "0x923f82a4af194f9b","0xab1c5ed5da6d8118", + "0xd807aa98a3030242","0x12835b0145706fbe", + "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", + "0x72be5d74f27b896f","0x80deb1fe3b1696b1", + "0x9bdc06a725c71235","0xc19bf174cf692694", + "0xe49b69c19ef14ad2","0xefbe4786384f25e3", + "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", + "0x2de92c6f592b0275","0x4a7484aa6ea6e483", + "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", + "0x983e5152ee66dfab","0xa831c66d2db43210", + "0xb00327c898fb213f","0xbf597fc7beef0ee4", + "0xc6e00bf33da88fc2","0xd5a79147930aa725", + "0x06ca6351e003826f","0x142929670a0e6e70", + "0x27b70a8546d22ffc","0x2e1b21385c26c926", + "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", + "0x650a73548baf63de","0x766a0abb3c77b2a8", + "0x81c2c92e47edaee6","0x92722c851482353b", + "0xa2bfe8a14cf10364","0xa81a664bbc423001", + "0xc24b8b70d0f89791","0xc76c51a30654be30", + "0xd192e819d6ef5218","0xd69906245565a910", + "0xf40e35855771202a","0x106aa07032bbd1b8", + "0x19a4c116b8d2d0c8","0x1e376c085141ab53", + "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", + "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", + "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", + "0x748f82ee5defb2fc","0x78a5636f43172f60", + "0x84c87814a1f0ab72","0x8cc702081a6439ec", + "0x90befffa23631e28","0xa4506cebde82bde9", + "0xbef9a3f7b2c67915","0xc67178f2e372532b", + "0xca273eceea26619c","0xd186b8c721c0c207", + "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", + "0x06f067aa72176fba","0x0a637dc5a2c898a6", + "0x113f9804bef90dae","0x1b710b35131c471b", + "0x28db77f523047d84","0x32caab7b40c72493", + "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", + "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", + "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); +$code.=<<___ if (!$LENDIAN); +.quad 0x0001020304050607,0x1011121314151617 +___ +$code.=<<___ if ($LENDIAN); # quad-swapped +.quad 0x1011121314151617,0x0001020304050607 +___ +} else { + local *table = sub { + foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } + }; + table( + "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", + "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", + "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", + "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", + "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", + "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", + "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", + "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", + "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", + "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", + "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", + "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", + "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", + "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", + "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", + "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); +$code.=<<___ if (!$LENDIAN); +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 +___ +$code.=<<___ if ($LENDIAN); # word-swapped +.long 0x10111213,0x10111213,0x10111213,0x00010203 +.long 0x10111213,0x10111213,0x04050607,0x00010203 +.long 0x10111213,0x08090a0b,0x04050607,0x00010203 +___ +} +$code.=<<___; +.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " +.align 2 +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +print $code; +close STDOUT; From f284fc7cc39e89928f5d75297eaf21e37914fd97 Mon Sep 17 00:00:00 2001 From: "Dr. Stephen Henson" Date: Sun, 20 Jul 2014 12:39:18 +0100 Subject: [PATCH 18/19] Add test header to Makefile, update ordinals Reviewed-by: Tim Hudson --- test/Makefile | 2 +- util/libeay.num | 5 +++++ util/ssleay.num | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/test/Makefile b/test/Makefile index 773aee3fcab9f..9963a39645826 100644 --- a/test/Makefile +++ b/test/Makefile @@ -103,7 +103,7 @@ SRC= $(BNTEST).c $(ECTEST).c $(ECDSATEST).c $(ECDHTEST).c $(IDEATEST).c \ $(V3NAMETEST).c $(HEARTBEATTEST).c EXHEADER= -HEADER= $(EXHEADER) +HEADER= testutil.h $(EXHEADER) ALL= $(GENERAL) $(SRC) $(HEADER) diff --git a/util/libeay.num b/util/libeay.num index 19bdfee0e71c0..a1d68d2f30d8e 100755 --- a/util/libeay.num +++ b/util/libeay.num @@ -4404,3 +4404,8 @@ X509V3_EXT_free 4763 EXIST::FUNCTION: BIO_hex_string 4764 EXIST::FUNCTION: X509_VERIFY_PARAM_set_hostflags 4765 EXIST::FUNCTION: BUF_strnlen 4766 EXIST::FUNCTION: +X509_VERIFY_PARAM_get0_peername 4767 EXIST::FUNCTION: +ECDSA_METHOD_set_app_data 4768 EXIST::FUNCTION:ECDSA +sk_deep_copy 4769 EXIST::FUNCTION: +ECDSA_METHOD_get_app_data 4770 EXIST::FUNCTION:ECDSA +X509_VERIFY_PARAM_add1_host 4771 EXIST::FUNCTION: diff --git a/util/ssleay.num b/util/ssleay.num index 75e1cc155c89c..519a8db831219 100755 --- a/util/ssleay.num +++ b/util/ssleay.num @@ -328,7 +328,7 @@ DTLS_method 367 EXIST::FUNCTION: DTLS_client_method 368 EXIST::FUNCTION: SSL_CIPHER_standard_name 369 EXIST::FUNCTION:SSL_TRACE SSL_set_alpn_protos 370 EXIST::FUNCTION: -SSL_CTX_set_srv_supp_data 371 EXIST::FUNCTION:TLSEXT +SSL_CTX_set_srv_supp_data 371 NOEXIST::FUNCTION: SSL_CONF_cmd_argv 372 EXIST::FUNCTION: DTLSv1_2_server_method 373 EXIST::FUNCTION: SSL_COMP_set0_compression_methods 374 EXIST:!VMS:FUNCTION:COMP @@ -360,7 +360,7 @@ SSL_check_chain 399 EXIST::FUNCTION:TLSEXT SSL_certs_clear 400 EXIST::FUNCTION: SSL_CONF_CTX_free 401 EXIST::FUNCTION: SSL_trace 402 EXIST::FUNCTION:SSL_TRACE -SSL_CTX_set_cli_supp_data 403 EXIST::FUNCTION:TLSEXT +SSL_CTX_set_cli_supp_data 403 NOEXIST::FUNCTION: DTLSv1_2_method 404 EXIST::FUNCTION: DTLS_server_method 405 EXIST::FUNCTION: SSL_CTX_use_serverinfo_file 406 EXIST::FUNCTION:STDIO,TLSEXT From 2be9425514c25ff6450cf0a13acf99ddc1a2c35a Mon Sep 17 00:00:00 2001 From: Tim Hudson Date: Mon, 21 Jul 2014 20:03:50 +1000 Subject: [PATCH 19/19] Minor documentation update removing "really" and a statement of opinion rather than a fact. Reviewed-by: Dr. Stephen Henson Reviewed-by: Rich Salz (cherry picked from commit c8d133e4b6f1ed1b7ad3c1a6d2c62f460e26c050) --- doc/crypto/des.pod | 7 +++---- doc/crypto/ui.pod | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/crypto/des.pod b/doc/crypto/des.pod index 6f0cf1cc5e5f1..e1add56b5e813 100644 --- a/doc/crypto/des.pod +++ b/doc/crypto/des.pod @@ -135,9 +135,8 @@ depend on a global variable. DES_set_odd_parity() sets the parity of the passed I to odd. -DES_is_weak_key() returns 1 is the passed key is a weak key, 0 if it -is ok. The probability that a randomly generated key is weak is -1/2^52, so it is not really worth checking for them. +DES_is_weak_key() returns 1 if the passed key is a weak key, 0 if it +is ok. The following routines mostly operate on an input and output stream of Is. @@ -181,7 +180,7 @@ of 24 bytes. This is much better than CBC DES. DES_ede3_cbc_encrypt() implements outer triple CBC DES encryption with three keys. This means that each DES operation inside the CBC mode is -really an C. This mode is used by SSL. +an C. This mode is used by SSL. The DES_ede2_cbc_encrypt() macro implements two-key Triple-DES by reusing I for the final encryption. C. diff --git a/doc/crypto/ui.pod b/doc/crypto/ui.pod index 6df68d604a82a..04f8e9c360a85 100644 --- a/doc/crypto/ui.pod +++ b/doc/crypto/ui.pod @@ -119,7 +119,7 @@ verification will fail. UI_add_input_boolean() adds a prompt to the UI that's supposed to be answered in a boolean way, with a single character for yes and a different character for no. A set of characters that can be used to cancel the prompt is given -as well. The prompt itself is really divided in two, one part being the +as well. The prompt itself is divided in two, one part being the descriptive text (given through the I argument) and one describing the possible answers (given through the I argument).