From b3f0be3c61d66001c08e09eb0075bd81280d48c9 Mon Sep 17 00:00:00 2001 From: Dan Aloni Date: Fri, 24 Apr 2015 21:50:39 +0300 Subject: [PATCH] Changes representative of linux-3.10.0-229.4.2.el7.tar.xz --- Makefile | 2 +- arch/parisc/hpux/fs.c | 15 +- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/kernel/rtas.c | 22 +- arch/powerpc/platforms/pseries/mobility.c | 22 +- arch/x86/crypto/Makefile | 5 + arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 580 +++++++++++ arch/x86/crypto/aesni-intel_glue.c | 49 +- arch/x86/crypto/sha-mb/Makefile | 11 + arch/x86/crypto/sha-mb/sha1_mb.c | 935 ++++++++++++++++++ .../crypto/sha-mb/sha1_mb_mgr_datastruct.S | 287 ++++++ .../crypto/sha-mb/sha1_mb_mgr_flush_avx2.S | 327 ++++++ .../x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c | 64 ++ .../crypto/sha-mb/sha1_mb_mgr_submit_avx2.S | 228 +++++ arch/x86/crypto/sha-mb/sha1_x8_avx2.S | 472 +++++++++ arch/x86/crypto/sha-mb/sha_mb_ctx.h | 136 +++ arch/x86/crypto/sha-mb/sha_mb_mgr.h | 110 +++ arch/x86/crypto/sha1_avx2_x86_64_asm.S | 708 +++++++++++++ arch/x86/crypto/sha1_ssse3_glue.c | 53 +- arch/x86/crypto/sha256_ssse3_glue.c | 2 +- crypto/Kconfig | 34 +- crypto/Makefile | 1 + crypto/ahash.c | 51 +- crypto/ansi_cprng.c | 6 +- crypto/drbg.c | 7 +- crypto/mcryptd.c | 705 +++++++++++++ crypto/testmgr.c | 10 +- drivers/char/random.c | 219 ++-- drivers/char/tpm/tpm_ibmvtpm.c | 20 +- drivers/iommu/dmar.c | 23 +- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 1 + drivers/net/team/team.c | 4 +- drivers/s390/crypto/ap_bus.c | 14 +- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/zcrypt_api.c | 3 + fs/exec.c | 51 +- fs/gfs2/file.c | 20 +- fs/namei.c | 145 ++- fs/open.c | 10 +- include/crypto/internal/hash.h | 22 + include/crypto/mcryptd.h | 112 +++ include/linux/audit.h | 7 +- include/linux/binfmts.h | 1 - include/linux/dmar.h | 8 +- include/linux/fs.h | 12 +- include/linux/fsnotify.h | 6 +- include/linux/sched.h | 4 +- init/main.c | 2 +- kernel/audit.h | 25 +- kernel/auditfilter.c | 10 + kernel/auditsc.c | 140 +-- kernel/kmod.c | 2 +- kernel/sched/core.c | 12 + net/openvswitch/datapath.c | 45 +- net/openvswitch/vport.h | 2 + 55 files changed, 5417 insertions(+), 348 deletions(-) create mode 100644 arch/x86/crypto/aes_ctrby8_avx-x86_64.S create mode 100644 arch/x86/crypto/sha-mb/Makefile create mode 100644 arch/x86/crypto/sha-mb/sha1_mb.c create mode 100644 arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S create mode 100644 arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S create mode 100644 arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c create mode 100644 arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S create mode 100644 arch/x86/crypto/sha-mb/sha1_x8_avx2.S create mode 100644 arch/x86/crypto/sha-mb/sha_mb_ctx.h create mode 100644 arch/x86/crypto/sha-mb/sha_mb_mgr.h create mode 100644 arch/x86/crypto/sha1_avx2_x86_64_asm.S create mode 100644 crypto/mcryptd.c create mode 100644 include/crypto/mcryptd.h diff --git a/Makefile b/Makefile index 27aa12c55d7dae..cc83dba6aed813 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ EXTRAVERSION = NAME = Unicycling Gorilla RHEL_MAJOR = 7 RHEL_MINOR = 1 -RHEL_RELEASE = 229.1.2 +RHEL_RELEASE = 229.4.2 RHEL_DRM_VERSION = 3 RHEL_DRM_PATCHLEVEL = 16 RHEL_DRM_SUBLEVEL = 7 diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 838b479a42c4eb..47f0103bdb9687 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -33,22 +33,9 @@ int hpux_execve(struct pt_regs *regs) { - int error; - struct filename *filename; - - filename = getname((const char __user *) regs->gr[26]); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - - error = do_execve(filename->name, + return do_execve(getname((const char __user *) regs->gr[26]), (const char __user *const __user *) regs->gr[25], (const char __user *const __user *) regs->gr[24]); - - putname(filename); - -out: - return error; } struct hpux_dirent { diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b390f55b0df137..2e23e92a43722f 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -327,7 +327,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); extern int rtas_online_cpus_mask(cpumask_var_t cpus); extern int rtas_offline_cpus_mask(cpumask_var_t cpus); -extern int rtas_ibm_suspend_me(struct rtas_args *); +extern int rtas_ibm_suspend_me(u64 handle, int *vasi_return); struct rtc_time; extern unsigned long rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8cd5ed049b5db0..305d962d1f4450 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -897,7 +897,7 @@ int rtas_offline_cpus_mask(cpumask_var_t cpus) } EXPORT_SYMBOL(rtas_offline_cpus_mask); -int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(u64 handle, int *vasi_return) { long state; long rc; @@ -911,8 +911,7 @@ int rtas_ibm_suspend_me(struct rtas_args *args) return -ENOSYS; /* Make sure the state is valid */ - rc = plpar_hcall(H_VASI_STATE, retbuf, - ((u64)args->args[0] << 32) | args->args[1]); + rc = plpar_hcall(H_VASI_STATE, retbuf, handle); state = retbuf[0]; @@ -920,12 +919,12 @@ int rtas_ibm_suspend_me(struct rtas_args *args) printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); return rc; } else if (state == H_VASI_ENABLED) { - args->args[args->nargs] = RTAS_NOT_SUSPENDABLE; + *vasi_return = RTAS_NOT_SUSPENDABLE; return 0; } else if (state != H_VASI_SUSPENDING) { printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n", state); - args->args[args->nargs] = -1; + *vasi_return = -1; return 0; } @@ -973,7 +972,7 @@ int rtas_ibm_suspend_me(struct rtas_args *args) return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(u64 handle, int *vasi_return) { return -ENOSYS; } @@ -1053,7 +1052,16 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { - rc = rtas_ibm_suspend_me(&args); + + /* + * rtas_ibm_suspend_me assumes args are in cpu endian, or at least the + * hcall within it requires it. + */ + int vasi_rc = 0; + u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) + | be32_to_cpu(args.args[1]); + rc = rtas_ibm_suspend_me(handle, &vasi_rc); + args.rets[0] = cpu_to_be32(vasi_rc); if (rc) return rc; goto copy_return; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index bde7ebad3949c9..450c6c6147d3e7 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -315,34 +315,24 @@ void post_mobility_fixup(void) static ssize_t migrate_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) { - struct rtas_args args; u64 streamid; int rc; + int vasi_rc = 0; rc = strict_strtoull(buf, 0, &streamid); if (rc) return rc; - memset(&args, 0, sizeof(args)); - args.token = rtas_token("ibm,suspend-me"); - args.nargs = 2; - args.nret = 1; - - args.args[0] = streamid >> 32 ; - args.args[1] = streamid & 0xffffffff; - args.rets = &args.args[args.nargs]; - do { - args.rets[0] = 0; - rc = rtas_ibm_suspend_me(&args); - if (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE) + rc = rtas_ibm_suspend_me(streamid, &vasi_rc); + if (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE) ssleep(1); - } while (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE); + } while (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE); if (rc) return rc; - else if (args.rets[0]) - return args.rets[0]; + if (vasi_rc) + return vasi_rc; post_mobility_fixup(); return count; diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 94cb151adc1d92..56a17beb9dcdc3 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o +obj-$(CONFIG_CRYPTO_SHA1_MB) += sha-mb/ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o @@ -81,8 +82,12 @@ ifeq ($(avx2_supported),yes) endif aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o +aesni-intel-$(CONFIG_64BIT) += aes_ctrby8_avx-x86_64.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o +ifeq ($(avx2_supported),yes) +sha1-ssse3-y += sha1_avx2_x86_64_asm.o +endif crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S new file mode 100644 index 00000000000000..a916c4a611652f --- /dev/null +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -0,0 +1,580 @@ +/* + * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64) + * + * This is AES128/192/256 CTR mode optimization implementation. It requires + * the support of Intel(R) AESNI and AVX instructions. + * + * This work was inspired by the AES CTR mode optimization published + * in Intel Optimized IPSEC Cryptograhpic library. + * Additional information on it can be found at: + * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972 + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Sean Gulley + * Chandramouli Narayanan + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include + +#define CONCAT(a,b) a##b +#define VMOVDQ vmovdqu + +#define xdata0 %xmm0 +#define xdata1 %xmm1 +#define xdata2 %xmm2 +#define xdata3 %xmm3 +#define xdata4 %xmm4 +#define xdata5 %xmm5 +#define xdata6 %xmm6 +#define xdata7 %xmm7 +#define xcounter %xmm8 +#define xbyteswap %xmm9 +#define xkey0 %xmm10 +#define xkey4 %xmm11 +#define xkey8 %xmm12 +#define xkey12 %xmm13 +#define xkeyA %xmm14 +#define xkeyB %xmm15 + +#define p_in %rdi +#define p_iv %rsi +#define p_keys %rdx +#define p_out %rcx +#define num_bytes %r8 + +#define tmp %r10 +#define DDQ(i) CONCAT(ddq_add_,i) +#define XMM(i) CONCAT(%xmm, i) +#define DDQ_DATA 0 +#define XDATA 1 +#define KEY_128 1 +#define KEY_192 2 +#define KEY_256 3 + +.section .rodata +.align 16 + +byteswap_const: + .octa 0x000102030405060708090A0B0C0D0E0F +ddq_low_msk: + .octa 0x0000000000000000FFFFFFFFFFFFFFFF +ddq_high_add_1: + .octa 0x00000000000000010000000000000000 +ddq_add_1: + .octa 0x00000000000000000000000000000001 +ddq_add_2: + .octa 0x00000000000000000000000000000002 +ddq_add_3: + .octa 0x00000000000000000000000000000003 +ddq_add_4: + .octa 0x00000000000000000000000000000004 +ddq_add_5: + .octa 0x00000000000000000000000000000005 +ddq_add_6: + .octa 0x00000000000000000000000000000006 +ddq_add_7: + .octa 0x00000000000000000000000000000007 +ddq_add_8: + .octa 0x00000000000000000000000000000008 + +.text + +/* generate a unique variable for ddq_add_x */ + +.macro setddq n + var_ddq_add = DDQ(\n) +.endm + +/* generate a unique variable for xmm register */ +.macro setxdata n + var_xdata = XMM(\n) +.endm + +/* club the numeric 'id' to the symbol 'name' */ + +.macro club name, id +.altmacro + .if \name == DDQ_DATA + setddq %\id + .elseif \name == XDATA + setxdata %\id + .endif +.noaltmacro +.endm + +/* + * do_aes num_in_par load_keys key_len + * This increments p_in, but not p_out + */ +.macro do_aes b, k, key_len + .set by, \b + .set load_keys, \k + .set klen, \key_len + + .if (load_keys) + vmovdqa 0*16(p_keys), xkey0 + .endif + + vpshufb xbyteswap, xcounter, xdata0 + + .set i, 1 + .rept (by - 1) + club DDQ_DATA, i + club XDATA, i + vpaddq var_ddq_add(%rip), xcounter, var_xdata + vptest ddq_low_msk(%rip), var_xdata + jnz 1f + vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: + vpshufb xbyteswap, var_xdata, var_xdata + .set i, (i +1) + .endr + + vmovdqa 1*16(p_keys), xkeyA + + vpxor xkey0, xdata0, xdata0 + club DDQ_DATA, by + vpaddq var_ddq_add(%rip), xcounter, xcounter + vptest ddq_low_msk(%rip), xcounter + jnz 1f + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: + + .set i, 1 + .rept (by - 1) + club XDATA, i + vpxor xkey0, var_xdata, var_xdata + .set i, (i +1) + .endr + + vmovdqa 2*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 3*16(p_keys), xkey4 + .endif + .else + vmovdqa 3*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ + .set i, (i +1) + .endr + + add $(16*by), p_in + + .if (klen == KEY_128) + vmovdqa 4*16(p_keys), xkeyB + .else + .if (load_keys) + vmovdqa 4*16(p_keys), xkey4 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 3 */ + .if (klen == KEY_128) + vaesenc xkey4, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + vmovdqa 5*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + /* key 4 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey4, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 6*16(p_keys), xkey8 + .endif + .else + vmovdqa 6*16(p_keys), xkeyB + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ + .set i, (i +1) + .endr + + vmovdqa 7*16(p_keys), xkeyA + + .set i, 0 + .rept by + club XDATA, i + /* key 6 */ + .if (klen == KEY_128) + vaesenc xkey8, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_128) + vmovdqa 8*16(p_keys), xkeyB + .else + .if (load_keys) + vmovdqa 8*16(p_keys), xkey8 + .endif + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ + .set i, (i +1) + .endr + + .if (klen == KEY_128) + .if (load_keys) + vmovdqa 9*16(p_keys), xkey12 + .endif + .else + vmovdqa 9*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 8 */ + .if (klen == KEY_128) + vaesenc xkeyB, var_xdata, var_xdata + .else + vaesenc xkey8, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + vmovdqa 10*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + /* key 9 */ + .if (klen == KEY_128) + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenc xkeyA, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen != KEY_128) + vmovdqa 11*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + /* key 10 */ + .if (klen == KEY_128) + vaesenclast xkeyB, var_xdata, var_xdata + .else + vaesenc xkeyB, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen != KEY_128) + .if (load_keys) + vmovdqa 12*16(p_keys), xkey12 + .endif + + .set i, 0 + .rept by + club XDATA, i + vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 13*16(p_keys), xkeyA + .endif + + .set i, 0 + .rept by + club XDATA, i + .if (klen == KEY_256) + /* key 12 */ + vaesenc xkey12, var_xdata, var_xdata + .else + vaesenclast xkey12, var_xdata, var_xdata + .endif + .set i, (i +1) + .endr + + .if (klen == KEY_256) + vmovdqa 14*16(p_keys), xkeyB + + .set i, 0 + .rept by + club XDATA, i + /* key 13 */ + vaesenc xkeyA, var_xdata, var_xdata + .set i, (i +1) + .endr + + .set i, 0 + .rept by + club XDATA, i + /* key 14 */ + vaesenclast xkeyB, var_xdata, var_xdata + .set i, (i +1) + .endr + .endif + .endif + + .set i, 0 + .rept (by / 2) + .set j, (i+1) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + VMOVDQ (j*16 - 16*by)(p_in), xkeyB + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + club XDATA, j + vpxor xkeyB, var_xdata, var_xdata + .set i, (i+2) + .endr + + .if (i < by) + VMOVDQ (i*16 - 16*by)(p_in), xkeyA + club XDATA, i + vpxor xkeyA, var_xdata, var_xdata + .endif + + .set i, 0 + .rept by + club XDATA, i + VMOVDQ var_xdata, i*16(p_out) + .set i, (i+1) + .endr +.endm + +.macro do_aes_load val, key_len + do_aes \val, 1, \key_len +.endm + +.macro do_aes_noload val, key_len + do_aes \val, 0, \key_len +.endm + +/* main body of aes ctr load */ + +.macro do_aes_ctrmain key_len + cmp $16, num_bytes + jb .Ldo_return2\key_len + + vmovdqa byteswap_const(%rip), xbyteswap + vmovdqu (p_iv), xcounter + vpshufb xbyteswap, xcounter, xcounter + + mov num_bytes, tmp + and $(7*16), tmp + jz .Lmult_of_8_blks\key_len + + /* 1 <= tmp <= 7 */ + cmp $(4*16), tmp + jg .Lgt4\key_len + je .Leq4\key_len + +.Llt4\key_len: + cmp $(2*16), tmp + jg .Leq3\key_len + je .Leq2\key_len + +.Leq1\key_len: + do_aes_load 1, \key_len + add $(1*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq2\key_len: + do_aes_load 2, \key_len + add $(2*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + + +.Leq3\key_len: + do_aes_load 3, \key_len + add $(3*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq4\key_len: + do_aes_load 4, \key_len + add $(4*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Lgt4\key_len: + cmp $(6*16), tmp + jg .Leq7\key_len + je .Leq6\key_len + +.Leq5\key_len: + do_aes_load 5, \key_len + add $(5*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq6\key_len: + do_aes_load 6, \key_len + add $(6*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Leq7\key_len: + do_aes_load 7, \key_len + add $(7*16), p_out + and $(~7*16), num_bytes + jz .Ldo_return2\key_len + jmp .Lmain_loop2\key_len + +.Lmult_of_8_blks\key_len: + .if (\key_len != KEY_128) + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 4*16(p_keys), xkey4 + vmovdqa 8*16(p_keys), xkey8 + vmovdqa 12*16(p_keys), xkey12 + .else + vmovdqa 0*16(p_keys), xkey0 + vmovdqa 3*16(p_keys), xkey4 + vmovdqa 6*16(p_keys), xkey8 + vmovdqa 9*16(p_keys), xkey12 + .endif +.align 16 +.Lmain_loop2\key_len: + /* num_bytes is a multiple of 8 and >0 */ + do_aes_noload 8, \key_len + add $(8*16), p_out + sub $(8*16), num_bytes + jne .Lmain_loop2\key_len + +.Ldo_return2\key_len: + /* return updated IV */ + vpshufb xbyteswap, xcounter, xcounter + vmovdqu xcounter, (p_iv) + ret +.endm + +/* + * routine to do AES128 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_128_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_128 + +ENDPROC(aes_ctr_enc_128_avx_by8) + +/* + * routine to do AES192 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_192_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_192 + +ENDPROC(aes_ctr_enc_192_avx_by8) + +/* + * routine to do AES256 CTR enc/decrypt "by8" + * XMM registers are clobbered. + * Saving/restoring must be done at a higher level + * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, + * unsigned int num_bytes) + */ +ENTRY(aes_ctr_enc_256_avx_by8) + /* call the aes main loop */ + do_aes_ctrmain KEY_256 + +ENDPROC(aes_ctr_enc_256_avx_by8) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 767639878a2f32..839cf65de6b834 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -103,6 +103,9 @@ int crypto_fpu_init(void); void crypto_fpu_exit(void); #ifdef CONFIG_X86_64 + +static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); @@ -151,6 +154,15 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); +#ifdef CONFIG_AS_AVX +asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, + void *keys, u8 *out, unsigned int num_bytes); +#endif + static inline struct aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) { @@ -353,6 +365,25 @@ static void ctr_crypt_final(struct crypto_aes_ctx *ctx, crypto_inc(ctrblk, AES_BLOCK_SIZE); } +#ifdef CONFIG_AS_AVX +static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv) +{ + /* + * based on key length, override with the by8 version + * of ctr mode encryption/decryption for improved performance + * aes_set_key_common() ensures that key length is one of + * {128,192,256} + */ + if (ctx->key_length == AES_KEYSIZE_128) + aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); + else if (ctx->key_length == AES_KEYSIZE_192) + aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); + else + aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); +} +#endif + static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) @@ -367,8 +398,8 @@ static int ctr_crypt(struct blkcipher_desc *desc, kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK, walk.iv); + aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } @@ -1007,7 +1038,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); if (!src) return -ENOMEM; - assoc = (src + req->cryptlen + auth_tag_len); + assoc = (src + req->cryptlen); scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); scatterwalk_map_and_copy(assoc, req->assoc, 0, req->assoclen, 0); @@ -1032,7 +1063,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) scatterwalk_done(&src_sg_walk, 0, 0); scatterwalk_done(&assoc_sg_walk, 0, 0); } else { - scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); + scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1); kfree(src); } return retval; @@ -1371,6 +1402,16 @@ static int __init aesni_init(void) if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; +#ifdef CONFIG_X86_64 + aesni_ctr_enc_tfm = aesni_ctr_enc; +#ifdef CONFIG_AS_AVX + if (cpu_has_avx) { + /* optimize performance of ctr mode encryption transform */ + aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm; + pr_info("AES CTR mode by8 optimization enabled\n"); + } +#endif +#endif err = crypto_fpu_init(); if (err) diff --git a/arch/x86/crypto/sha-mb/Makefile b/arch/x86/crypto/sha-mb/Makefile new file mode 100644 index 00000000000000..2f8756375df54a --- /dev/null +++ b/arch/x86/crypto/sha-mb/Makefile @@ -0,0 +1,11 @@ +# +# Arch-specific CryptoAPI modules. +# + +avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ + $(comma)4)$(comma)%ymm2,yes,no) +ifeq ($(avx2_supported),yes) + obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o + sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \ + sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o +endif diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c new file mode 100644 index 00000000000000..99eefd81295846 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -0,0 +1,935 @@ +/* + * Multi buffer SHA1 algorithm Glue Code + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sha_mb_ctx.h" + +#define FLUSH_INTERVAL 1000 /* in usec */ + +static struct mcryptd_alg_state sha1_mb_alg_state; + +struct sha1_mb_ctx { + struct mcryptd_ahash *mcryptd_tfm; +}; + +static inline struct mcryptd_hash_request_ctx *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) +{ + struct shash_desc *desc; + + desc = container_of((void *) hash_ctx, struct shash_desc, __ctx); + return container_of(desc, struct mcryptd_hash_request_ctx, desc); +} + +static inline struct ahash_request *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) +{ + return container_of((void *) ctx, struct ahash_request, __ctx); +} + +static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, + struct shash_desc *desc) +{ + rctx->flag = HASH_UPDATE; +} + +static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state); +static asmlinkage struct job_sha1* (*sha1_job_mgr_submit)(struct sha1_mb_mgr *state, + struct job_sha1 *job); +static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)(struct sha1_mb_mgr *state); +static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)(struct sha1_mb_mgr *state); + +inline void sha1_init_digest(uint32_t *digest) +{ + static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0, + SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; + memcpy(digest, initial_digest, sizeof(initial_digest)); +} + +inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], + uint32_t total_len) +{ + uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); + + memset(&padblock[i], 0, SHA1_BLOCK_SIZE); + padblock[i] = 0x80; + + i += ((SHA1_BLOCK_SIZE - 1) & + (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + + 1 + SHA1_PADLENGTHFIELD_SIZE; + +#if SHA1_PADLENGTHFIELD_SIZE == 16 + *((uint64_t *) &padblock[i - 16]) = 0; +#endif + + *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); + + /* Number of extra blocks to hash */ + return i >> SHA1_LOG2_BLOCK_SIZE; +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, struct sha1_hash_ctx *ctx) +{ + while (ctx) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { + /* Clear PROCESSING bit */ + ctx->status = HASH_CTX_STS_COMPLETE; + return ctx; + } + + /* + * If the extra blocks are empty, begin hashing what remains + * in the user's buffer. + */ + if (ctx->partial_block_buffer_length == 0 && + ctx->incoming_buffer_length) { + + const void *buffer = ctx->incoming_buffer; + uint32_t len = ctx->incoming_buffer_length; + uint32_t copy_len; + + /* + * Only entire blocks can be hashed. + * Copy remainder to extra blocks buffer. + */ + copy_len = len & (SHA1_BLOCK_SIZE-1); + + if (copy_len) { + len -= copy_len; + memcpy(ctx->partial_block_buffer, + ((const char *) buffer + len), + copy_len); + ctx->partial_block_buffer_length = copy_len; + } + + ctx->incoming_buffer_length = 0; + + /* len should be a multiple of the block size now */ + assert((len % SHA1_BLOCK_SIZE) == 0); + + /* Set len to the number of blocks to be hashed */ + len >>= SHA1_LOG2_BLOCK_SIZE; + + if (len) { + + ctx->job.buffer = (uint8_t *) buffer; + ctx->job.len = len; + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, + &ctx->job); + continue; + } + } + + /* + * If the extra blocks are not empty, then we are + * either on the last block(s) or we need more + * user input before continuing. + */ + if (ctx->status & HASH_CTX_STS_LAST) { + + uint8_t *buf = ctx->partial_block_buffer; + uint32_t n_extra_blocks = sha1_pad(buf, ctx->total_length); + + ctx->status = (HASH_CTX_STS_PROCESSING | + HASH_CTX_STS_COMPLETE); + ctx->job.buffer = buf; + ctx->job.len = (uint32_t) n_extra_blocks; + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job); + continue; + } + + if (ctx) + ctx->status = HASH_CTX_STS_IDLE; + return ctx; + } + + return NULL; +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) +{ + /* + * If get_comp_job returns NULL, there are no jobs complete. + * If get_comp_job returns a job, verify that it is safe to return to the user. + * If it is not ready, resubmit the job to finish processing. + * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. + * Otherwise, all jobs currently being managed by the hash_ctx_mgr still need processing. + */ + struct sha1_hash_ctx *ctx; + + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr); + return sha1_ctx_mgr_resubmit(mgr, ctx); +} + +static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr) +{ + sha1_job_mgr_init(&mgr->mgr); +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, + struct sha1_hash_ctx *ctx, + const void *buffer, + uint32_t len, + int flags) +{ + if (flags & (~HASH_ENTIRE)) { + /* User should not pass anything other than FIRST, UPDATE, or LAST */ + ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; + return ctx; + } + + if (ctx->status & HASH_CTX_STS_PROCESSING) { + /* Cannot submit to a currently processing job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; + return ctx; + } + + if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + /* Cannot update a finished job. */ + ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; + return ctx; + } + + + if (flags & HASH_FIRST) { + /* Init digest */ + sha1_init_digest(ctx->job.result_digest); + + /* Reset byte counter */ + ctx->total_length = 0; + + /* Clear extra blocks */ + ctx->partial_block_buffer_length = 0; + } + + /* If we made it here, there were no errors during this call to submit */ + ctx->error = HASH_CTX_ERROR_NONE; + + /* Store buffer ptr info from user */ + ctx->incoming_buffer = buffer; + ctx->incoming_buffer_length = len; + + /* Store the user's request flags and mark this ctx as currently being processed. */ + ctx->status = (flags & HASH_LAST) ? + (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : + HASH_CTX_STS_PROCESSING; + + /* Advance byte counter */ + ctx->total_length += len; + + /* + * If there is anything currently buffered in the extra blocks, + * append to it until it contains a whole block. + * Or if the user's buffer contains less than a whole block, + * append as much as possible to the extra block. + */ + if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) { + /* Compute how many bytes to copy from user buffer into extra block */ + uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length; + if (len < copy_len) + copy_len = len; + + if (copy_len) { + /* Copy and update relevant pointers and counters */ + memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length], + buffer, copy_len); + + ctx->partial_block_buffer_length += copy_len; + ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len); + ctx->incoming_buffer_length = len - copy_len; + } + + /* The extra block should never contain more than 1 block here */ + assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); + + /* If the extra block buffer contains exactly 1 block, it can be hashed. */ + if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { + ctx->partial_block_buffer_length = 0; + + ctx->job.buffer = ctx->partial_block_buffer; + ctx->job.len = 1; + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_submit(&mgr->mgr, &ctx->job); + } + } + + return sha1_ctx_mgr_resubmit(mgr, ctx); +} + +static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) +{ + struct sha1_hash_ctx *ctx; + + while (1) { + ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr); + + /* If flush returned 0, there are no more jobs in flight. */ + if (!ctx) + return NULL; + + /* + * If flush returned a job, resubmit the job to finish processing. + */ + ctx = sha1_ctx_mgr_resubmit(mgr, ctx); + + /* + * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. + * Otherwise, all jobs currently being managed by the sha1_ctx_mgr + * still need processing. Loop. + */ + if (ctx) + return ctx; + } +} + +static int sha1_mb_init(struct shash_desc *desc) +{ + struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + + hash_ctx_init(sctx); + sctx->job.result_digest[0] = SHA1_H0; + sctx->job.result_digest[1] = SHA1_H1; + sctx->job.result_digest[2] = SHA1_H2; + sctx->job.result_digest[3] = SHA1_H3; + sctx->job.result_digest[4] = SHA1_H4; + sctx->total_length = 0; + sctx->partial_block_buffer_length = 0; + sctx->status = HASH_CTX_STS_IDLE; + + return 0; +} + +static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx) +{ + int i; + struct sha1_hash_ctx *sctx = shash_desc_ctx(&rctx->desc); + __be32 *dst = (__be32 *) rctx->out; + + for (i = 0; i < 5; ++i) + dst[i] = cpu_to_be32(sctx->job.result_digest[i]); + + return 0; +} + +static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, + struct mcryptd_alg_cstate *cstate, bool flush) +{ + int flag = HASH_UPDATE; + int nbytes, err = 0; + struct mcryptd_hash_request_ctx *rctx = *ret_rctx; + struct sha1_hash_ctx *sha_ctx; + + /* more work ? */ + while (!(rctx->flag & HASH_DONE)) { + nbytes = crypto_ahash_walk_done(&rctx->walk, 0); + if (nbytes < 0) { + err = nbytes; + goto out; + } + /* check if the walk is done */ + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + if (rctx->flag & HASH_FINAL) + flag |= HASH_LAST; + + } + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(&rctx->desc); + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag); + if (!sha_ctx) { + if (flush) + sha_ctx = sha1_ctx_mgr_flush(cstate->mgr); + } + kernel_fpu_end(); + if (sha_ctx) + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + else { + rctx = NULL; + goto out; + } + } + + /* copy the results */ + if (rctx->flag & HASH_FINAL) + sha1_mb_set_results(rctx); + +out: + *ret_rctx = rctx; + return err; +} + +static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate, + int err) +{ + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha1_hash_ctx *sha_ctx; + struct mcryptd_hash_request_ctx *req_ctx; + int ret; + + /* remove from work list */ + spin_lock(&cstate->work_lock); + list_del(&rctx->waiter); + spin_unlock(&cstate->work_lock); + + if (irqs_disabled()) + rctx->complete(&req->base, err); + else { + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); + } + + /* check to see if there are other jobs that are done */ + sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); + while (sha_ctx) { + req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&req_ctx, cstate, false); + if (req_ctx) { + spin_lock(&cstate->work_lock); + list_del(&req_ctx->waiter); + spin_unlock(&cstate->work_lock); + + req = cast_mcryptd_ctx_to_req(req_ctx); + if (irqs_disabled()) + rctx->complete(&req->base, ret); + else { + local_bh_disable(); + rctx->complete(&req->base, ret); + local_bh_enable(); + } + } + sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); + } + + return 0; +} + +static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx, + struct mcryptd_alg_cstate *cstate) +{ + unsigned long next_flush; + unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); + + /* initialize tag */ + rctx->tag.arrival = jiffies; /* tag the arrival time */ + rctx->tag.seq_num = cstate->next_seq_num++; + next_flush = rctx->tag.arrival + delay; + rctx->tag.expire = next_flush; + + spin_lock(&cstate->work_lock); + list_add_tail(&rctx->waiter, &cstate->work_list); + spin_unlock(&cstate->work_lock); + + mcryptd_arm_flusher(cstate, delay); +} + +static int sha1_mb_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(desc, struct mcryptd_hash_request_ctx, desc); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha1_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha1_hash_ctx *sha_ctx; + int ret = 0, nbytes; + + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, desc); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) + rctx->flag |= HASH_DONE; + + /* submit */ + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + sha1_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha1_mb_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(desc, struct mcryptd_hash_request_ctx, desc); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha1_mb_alg_state.alg_cstate); + + struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); + struct sha1_hash_ctx *sha_ctx; + int ret = 0, flag = HASH_UPDATE, nbytes; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, desc); + + nbytes = crypto_ahash_walk_first(req, &rctx->walk); + + if (nbytes < 0) { + ret = nbytes; + goto done; + } + + if (crypto_ahash_walk_last(&rctx->walk)) { + rctx->flag |= HASH_DONE; + flag = HASH_LAST; + } + rctx->out = out; + + /* submit */ + rctx->flag |= HASH_FINAL; + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + sha1_mb_add_list(rctx, cstate); + + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, nbytes, flag); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha1_mb_final(struct shash_desc *desc, u8 *out) +{ + struct mcryptd_hash_request_ctx *rctx = + container_of(desc, struct mcryptd_hash_request_ctx, desc); + struct mcryptd_alg_cstate *cstate = + this_cpu_ptr(sha1_mb_alg_state.alg_cstate); + + struct sha1_hash_ctx *sha_ctx; + int ret = 0; + u8 data; + + /* sanity check */ + if (rctx->tag.cpu != smp_processor_id()) { + pr_err("mcryptd error: cpu clash\n"); + goto done; + } + + /* need to init context */ + req_ctx_init(rctx, desc); + + rctx->out = out; + rctx->flag |= HASH_DONE | HASH_FINAL; + + sha_ctx = (struct sha1_hash_ctx *) shash_desc_ctx(desc); + /* flag HASH_FINAL and 0 data size */ + sha1_mb_add_list(rctx, cstate); + kernel_fpu_begin(); + sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, HASH_LAST); + kernel_fpu_end(); + + /* check if anything is returned */ + if (!sha_ctx) + return -EINPROGRESS; + + if (sha_ctx->error) { + ret = sha_ctx->error; + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + goto done; + } + + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + ret = sha_finish_walk(&rctx, cstate, false); + if (!rctx) + return -EINPROGRESS; +done: + sha_complete_job(rctx, cstate, ret); + return ret; +} + +static int sha1_mb_export(struct shash_desc *desc, void *out) +{ + struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_mb_import(struct shash_desc *desc, const void *in) +{ + struct sha1_hash_ctx *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + + +static struct shash_alg sha1_mb_shash_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_mb_init, + .update = sha1_mb_update, + .final = sha1_mb_final, + .finup = sha1_mb_finup, + .export = sha1_mb_export, + .import = sha1_mb_import, + .descsize = sizeof(struct sha1_hash_ctx), + .statesize = sizeof(struct sha1_hash_ctx), + .base = { + .cra_name = "__sha1-mb", + .cra_driver_name = "__intel_sha1-mb", + .cra_priority = 100, + /* + * use ASYNC flag as some buffers in multi-buffer + * algo may not have completed before hashing thread sleep + */ + .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list), + } +}; + +static int sha1_mb_async_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_init(mcryptd_req); +} + +static int sha1_mb_async_update(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_update(mcryptd_req); +} + +static int sha1_mb_async_finup(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_finup(mcryptd_req); +} + +static int sha1_mb_async_final(struct ahash_request *req) +{ + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_final(mcryptd_req); +} + +static int sha1_mb_async_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); + struct ahash_request *mcryptd_req = ahash_request_ctx(req); + struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; + + memcpy(mcryptd_req, req, sizeof(*req)); + ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); + return crypto_ahash_digest(mcryptd_req); +} + +static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) +{ + struct mcryptd_ahash *mcryptd_tfm; + struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + struct mcryptd_hash_ctx *mctx; + + mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0); + if (IS_ERR(mcryptd_tfm)) + return PTR_ERR(mcryptd_tfm); + mctx = crypto_ahash_ctx(&mcryptd_tfm->base); + mctx->alg_state = &sha1_mb_alg_state; + ctx->mcryptd_tfm = mcryptd_tfm; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(&mcryptd_tfm->base)); + + return 0; +} + +static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) +{ + struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); + + mcryptd_free_ahash(ctx->mcryptd_tfm); +} + +static struct ahash_alg sha1_mb_async_alg = { + .init = sha1_mb_async_init, + .update = sha1_mb_async_update, + .final = sha1_mb_async_final, + .finup = sha1_mb_async_finup, + .digest = sha1_mb_async_digest, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1_mb", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_type = &crypto_ahash_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list), + .cra_init = sha1_mb_async_init_tfm, + .cra_exit = sha1_mb_async_exit_tfm, + .cra_ctxsize = sizeof(struct sha1_mb_ctx), + .cra_alignmask = 0, + }, + }, +}; + +static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) +{ + struct mcryptd_hash_request_ctx *rctx; + unsigned long cur_time; + unsigned long next_flush = 0; + struct sha1_hash_ctx *sha_ctx; + + + cur_time = jiffies; + + while (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + if time_before(cur_time, rctx->tag.expire) + break; + kernel_fpu_begin(); + sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr); + kernel_fpu_end(); + if (!sha_ctx) { + pr_err("sha1_mb error: nothing got flushed for non-empty list\n"); + break; + } + rctx = cast_hash_to_mcryptd_ctx(sha_ctx); + sha_finish_walk(&rctx, cstate, true); + sha_complete_job(rctx, cstate, 0); + } + + if (!list_empty(&cstate->work_list)) { + rctx = list_entry(cstate->work_list.next, + struct mcryptd_hash_request_ctx, waiter); + /* get the hash context and then flush time */ + next_flush = rctx->tag.expire; + mcryptd_arm_flusher(cstate, get_delay(next_flush)); + } + return next_flush; +} + +static int __init sha1_mb_mod_init(void) +{ + + int cpu; + int err; + struct mcryptd_alg_cstate *cpu_state; + + /* check for dependent cpu features */ + if (!boot_cpu_has(X86_FEATURE_AVX2) || + !boot_cpu_has(X86_FEATURE_BMI2)) + return -ENODEV; + + /* initialize multibuffer structures */ + sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate); + + sha1_job_mgr_init = sha1_mb_mgr_init_avx2; + sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2; + sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2; + sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2; + + if (!sha1_mb_alg_state.alg_cstate) + return -ENOMEM; + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); + cpu_state->next_flush = 0; + cpu_state->next_seq_num = 0; + cpu_state->flusher_engaged = false; + INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); + cpu_state->cpu = cpu; + cpu_state->alg_state = &sha1_mb_alg_state; + cpu_state->mgr = (struct sha1_ctx_mgr *) kzalloc(sizeof(struct sha1_ctx_mgr), GFP_KERNEL); + if (!cpu_state->mgr) + goto err2; + sha1_ctx_mgr_init(cpu_state->mgr); + INIT_LIST_HEAD(&cpu_state->work_list); + spin_lock_init(&cpu_state->work_lock); + } + sha1_mb_alg_state.flusher = &sha1_mb_flusher; + + err = crypto_register_shash(&sha1_mb_shash_alg); + if (err) + goto err2; + err = crypto_register_ahash(&sha1_mb_async_alg); + if (err) + goto err1; + + + return 0; +err1: + crypto_unregister_shash(&sha1_mb_shash_alg); +err2: + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha1_mb_alg_state.alg_cstate); + return -ENODEV; +} + +static void __exit sha1_mb_mod_fini(void) +{ + int cpu; + struct mcryptd_alg_cstate *cpu_state; + + crypto_unregister_ahash(&sha1_mb_async_alg); + crypto_unregister_shash(&sha1_mb_shash_alg); + for_each_possible_cpu(cpu) { + cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); + kfree(cpu_state->mgr); + } + free_percpu(sha1_mb_alg_state.alg_cstate); +} + +module_init(sha1_mb_mod_init); +module_exit(sha1_mb_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated"); + +MODULE_ALIAS("sha1"); diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S new file mode 100644 index 00000000000000..86688c6e7a25bb --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_datastruct.S @@ -0,0 +1,287 @@ +/* + * Header file for multi buffer SHA1 algorithm data structure + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +# Macros for defining data structures + +# Usage example + +#START_FIELDS # JOB_AES +### name size align +#FIELD _plaintext, 8, 8 # pointer to plaintext +#FIELD _ciphertext, 8, 8 # pointer to ciphertext +#FIELD _IV, 16, 8 # IV +#FIELD _keys, 8, 8 # pointer to keys +#FIELD _len, 4, 4 # length in bytes +#FIELD _status, 4, 4 # status enumeration +#FIELD _user_data, 8, 8 # pointer to user data +#UNION _union, size1, align1, \ +# size2, align2, \ +# size3, align3, \ +# ... +#END_FIELDS +#%assign _JOB_AES_size _FIELD_OFFSET +#%assign _JOB_AES_align _STRUCT_ALIGN + +######################################################################### + +# Alternate "struc-like" syntax: +# STRUCT job_aes2 +# RES_Q .plaintext, 1 +# RES_Q .ciphertext, 1 +# RES_DQ .IV, 1 +# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN +# RES_U .union, size1, align1, \ +# size2, align2, \ +# ... +# ENDSTRUCT +# # Following only needed if nesting +# %assign job_aes2_size _FIELD_OFFSET +# %assign job_aes2_align _STRUCT_ALIGN +# +# RES_* macros take a name, a count and an optional alignment. +# The count in in terms of the base size of the macro, and the +# default alignment is the base size. +# The macros are: +# Macro Base size +# RES_B 1 +# RES_W 2 +# RES_D 4 +# RES_Q 8 +# RES_DQ 16 +# RES_Y 32 +# RES_Z 64 +# +# RES_U defines a union. It's arguments are a name and two or more +# pairs of "size, alignment" +# +# The two assigns are only needed if this structure is being nested +# within another. Even if the assigns are not done, one can still use +# STRUCT_NAME_size as the size of the structure. +# +# Note that for nesting, you still need to assign to STRUCT_NAME_size. +# +# The differences between this and using "struc" directly are that each +# type is implicitly aligned to its natural length (although this can be +# over-ridden with an explicit third parameter), and that the structure +# is padded at the end to its overall alignment. +# + +######################################################################### + +#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_ +#define _SHA1_MB_MGR_DATASTRUCT_ASM_ + +## START_FIELDS +.macro START_FIELDS + _FIELD_OFFSET = 0 + _STRUCT_ALIGN = 0 +.endm + +## FIELD name size align +.macro FIELD name size align + _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) + \name = _FIELD_OFFSET + _FIELD_OFFSET = _FIELD_OFFSET + (\size) +.if (\align > _STRUCT_ALIGN) + _STRUCT_ALIGN = \align +.endif +.endm + +## END_FIELDS +.macro END_FIELDS + _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) +.endm + +######################################################################## + +.macro STRUCT p1 +START_FIELDS +.struc \p1 +.endm + +.macro ENDSTRUCT + tmp = _FIELD_OFFSET + END_FIELDS + tmp = (_FIELD_OFFSET - %%tmp) +.if (tmp > 0) + .lcomm tmp +.endif +.endstruc +.endm + +## RES_int name size align +.macro RES_int p1 p2 p3 + name = \p1 + size = \p2 + align = .\p3 + + _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) +.align align +.lcomm name size + _FIELD_OFFSET = _FIELD_OFFSET + (size) +.if (align > _STRUCT_ALIGN) + _STRUCT_ALIGN = align +.endif +.endm + + + +# macro RES_B name, size [, align] +.macro RES_B _name, _size, _align=1 +RES_int _name _size _align +.endm + +# macro RES_W name, size [, align] +.macro RES_W _name, _size, _align=2 +RES_int _name 2*(_size) _align +.endm + +# macro RES_D name, size [, align] +.macro RES_D _name, _size, _align=4 +RES_int _name 4*(_size) _align +.endm + +# macro RES_Q name, size [, align] +.macro RES_Q _name, _size, _align=8 +RES_int _name 8*(_size) _align +.endm + +# macro RES_DQ name, size [, align] +.macro RES_DQ _name, _size, _align=16 +RES_int _name 16*(_size) _align +.endm + +# macro RES_Y name, size [, align] +.macro RES_Y _name, _size, _align=32 +RES_int _name 32*(_size) _align +.endm + +# macro RES_Z name, size [, align] +.macro RES_Z _name, _size, _align=64 +RES_int _name 64*(_size) _align +.endm + + +#endif + +######################################################################## +#### Define constants +######################################################################## + +######################################################################## +#### Define SHA1 Out Of Order Data Structures +######################################################################## + +START_FIELDS # LANE_DATA +### name size align +FIELD _job_in_lane, 8, 8 # pointer to job object +END_FIELDS + +_LANE_DATA_size = _FIELD_OFFSET +_LANE_DATA_align = _STRUCT_ALIGN + +######################################################################## + +START_FIELDS # SHA1_ARGS_X8 +### name size align +FIELD _digest, 4*5*8, 16 # transposed digest +FIELD _data_ptr, 8*8, 8 # array of pointers to data +END_FIELDS + +_SHA1_ARGS_X4_size = _FIELD_OFFSET +_SHA1_ARGS_X4_align = _STRUCT_ALIGN +_SHA1_ARGS_X8_size = _FIELD_OFFSET +_SHA1_ARGS_X8_align = _STRUCT_ALIGN + +######################################################################## + +START_FIELDS # MB_MGR +### name size align +FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align +FIELD _lens, 4*8, 8 +FIELD _unused_lanes, 8, 8 +FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align +END_FIELDS + +_MB_MGR_size = _FIELD_OFFSET +_MB_MGR_align = _STRUCT_ALIGN + +_args_digest = _args + _digest +_args_data_ptr = _args + _data_ptr + + +######################################################################## +#### Define constants +######################################################################## + +#define STS_UNKNOWN 0 +#define STS_BEING_PROCESSED 1 +#define STS_COMPLETED 2 + +######################################################################## +#### Define JOB_SHA1 structure +######################################################################## + +START_FIELDS # JOB_SHA1 + +### name size align +FIELD _buffer, 8, 8 # pointer to buffer +FIELD _len, 4, 4 # length in bytes +FIELD _result_digest, 5*4, 32 # Digest (output) +FIELD _status, 4, 4 +FIELD _user_data, 8, 8 +END_FIELDS + +_JOB_SHA1_size = _FIELD_OFFSET +_JOB_SHA1_align = _STRUCT_ALIGN diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S new file mode 100644 index 00000000000000..85c4e1cf717235 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S @@ -0,0 +1,327 @@ +/* + * Flush routine for SHA1 multibuffer + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include "sha1_mb_mgr_datastruct.S" + + +.extern sha1_x8_avx2 + +# LINUX register definitions +#define arg1 %rdi +#define arg2 %rsi + +# Common definitions +#define state arg1 +#define job arg2 +#define len2 arg2 + +# idx must be a register not clobbered by sha1_x8_avx2 +#define idx %r8 +#define DWORD_idx %r8d + +#define unused_lanes %rbx +#define lane_data %rbx +#define tmp2 %rbx +#define tmp2_w %ebx + +#define job_rax %rax +#define tmp1 %rax +#define size_offset %rax +#define tmp %rax +#define start_offset %rax + +#define tmp3 %arg1 + +#define extra_blocks %arg2 +#define p %arg2 + + +# STACK_SPACE needs to be an odd multiple of 8 +_XMM_SAVE_SIZE = 10*16 +_GPR_SAVE_SIZE = 8*8 +_ALIGN_SIZE = 8 + +_XMM_SAVE = 0 +_GPR_SAVE = _XMM_SAVE + _XMM_SAVE_SIZE +STACK_SPACE = _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE + +.macro LABEL prefix n +\prefix\n\(): +.endm + +.macro JNE_SKIP i +jne skip_\i +.endm + +.altmacro +.macro SET_OFFSET _offset +offset = \_offset +.endm +.noaltmacro + +# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) +# arg 1 : rcx : state +ENTRY(sha1_mb_mgr_flush_avx2) + mov %rsp, %r10 + sub $STACK_SPACE, %rsp + and $~31, %rsp + mov %rbx, _GPR_SAVE(%rsp) + mov %r10, _GPR_SAVE+8*1(%rsp) #save rsp + mov %rbp, _GPR_SAVE+8*3(%rsp) + mov %r12, _GPR_SAVE+8*4(%rsp) + mov %r13, _GPR_SAVE+8*5(%rsp) + mov %r14, _GPR_SAVE+8*6(%rsp) + mov %r15, _GPR_SAVE+8*7(%rsp) + + # If bit (32+3) is set, then all lanes are empty + mov _unused_lanes(state), unused_lanes + bt $32+3, unused_lanes + jc return_null + + # find a lane with a non-null job + xor idx, idx + offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne one(%rip), idx + offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne two(%rip), idx + offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne three(%rip), idx + offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne four(%rip), idx + offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne five(%rip), idx + offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne six(%rip), idx + offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) + cmovne seven(%rip), idx + + # copy idx to empty lanes +copy_lane_data: + offset = (_args + _data_ptr) + mov offset(state,idx,8), tmp + + I = 0 +.rep 8 + offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) + cmpq $0, offset(state) +.altmacro + JNE_SKIP %I + offset = (_args + _data_ptr + 8*I) + mov tmp, offset(state) + offset = (_lens + 4*I) + movl $0xFFFFFFFF, offset(state) +LABEL skip_ %I + I = (I+1) +.noaltmacro +.endr + + # Find min length + vmovdqa _lens+0*16(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword + + vmovd %xmm2, DWORD_idx + mov idx, len2 + and $0xF, idx + shr $4, len2 + jz len_is_0 + + vpand clear_low_nibble(%rip), %xmm2, %xmm2 + vpshufd $0, %xmm2, %xmm2 + + vpsubd %xmm2, %xmm0, %xmm0 + vpsubd %xmm2, %xmm1, %xmm1 + + vmovdqa %xmm0, _lens+0*16(state) + vmovdqa %xmm1, _lens+1*16(state) + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha1_x8_avx2 + # state and idx are intact + + +len_is_0: + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state, idx, 4) + + vmovd _args_digest(state , idx, 4) , %xmm0 + vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 + movl _args_digest+4*32(state, idx, 4), tmp2_w + + vmovdqu %xmm0, _result_digest(job_rax) + offset = (_result_digest + 1*16) + mov tmp2_w, offset(job_rax) + +return: + + mov _GPR_SAVE(%rsp), %rbx + mov _GPR_SAVE+8*1(%rsp), %r10 #saved rsp + mov _GPR_SAVE+8*3(%rsp), %rbp + mov _GPR_SAVE+8*4(%rsp), %r12 + mov _GPR_SAVE+8*5(%rsp), %r13 + mov _GPR_SAVE+8*6(%rsp), %r14 + mov _GPR_SAVE+8*7(%rsp), %r15 + mov %r10, %rsp + + ret + +return_null: + xor job_rax, job_rax + jmp return +ENDPROC(sha1_mb_mgr_flush_avx2) + + +################################################################# + +.align 16 +ENTRY(sha1_mb_mgr_get_comp_job_avx2) + push %rbx + + ## if bit 32+3 is set, then all lanes are empty + mov _unused_lanes(state), unused_lanes + bt $(32+3), unused_lanes + jc .return_null + + # Find min length + vmovdqa _lens(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword + + vmovd %xmm2, DWORD_idx + test $~0xF, idx + jnz .return_null + + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + mov _unused_lanes(state), unused_lanes + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state, idx, 4) + + vmovd _args_digest(state, idx, 4), %xmm0 + vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 + movl _args_digest+4*32(state, idx, 4), tmp2_w + + vmovdqu %xmm0, _result_digest(job_rax) + movl tmp2_w, _result_digest+1*16(job_rax) + + pop %rbx + + ret + +.return_null: + xor job_rax, job_rax + pop %rbx + ret +ENDPROC(sha1_mb_mgr_get_comp_job_avx2) + +.data + +.align 16 +clear_low_nibble: +.octa 0x000000000000000000000000FFFFFFF0 +one: +.quad 1 +two: +.quad 2 +three: +.quad 3 +four: +.quad 4 +five: +.quad 5 +six: +.quad 6 +seven: +.quad 7 diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c new file mode 100644 index 00000000000000..4ca7e166a2aa3b --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c @@ -0,0 +1,64 @@ +/* + * Initialization code for multi buffer SHA1 algorithm for AVX2 + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sha_mb_mgr.h" + +void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) +{ + unsigned int j; + state->unused_lanes = 0xF76543210; + for (j = 0; j < 8; j++) { + state->lens[j] = 0xFFFFFFFF; + state->ldata[j].job_in_lane = NULL; + } +} diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S new file mode 100644 index 00000000000000..2ab9560b53c84d --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S @@ -0,0 +1,228 @@ +/* + * Buffer submit code for multi buffer SHA1 algorithm + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "sha1_mb_mgr_datastruct.S" + + +.extern sha1_x8_avx + +# LINUX register definitions +arg1 = %rdi +arg2 = %rsi +size_offset = %rcx +tmp2 = %rcx +extra_blocks = %rdx + +# Common definitions +#define state arg1 +#define job %rsi +#define len2 arg2 +#define p2 arg2 + +# idx must be a register not clobberred by sha1_x8_avx2 +idx = %r8 +DWORD_idx = %r8d +last_len = %r8 + +p = %r11 +start_offset = %r11 + +unused_lanes = %rbx +BYTE_unused_lanes = %bl + +job_rax = %rax +len = %rax +DWORD_len = %eax + +lane = %rbp +tmp3 = %rbp + +tmp = %r9 +DWORD_tmp = %r9d + +lane_data = %r10 + +# STACK_SPACE needs to be an odd multiple of 8 +STACK_SPACE = 8*8 + 16*10 + 8 + +# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) +# arg 1 : rcx : state +# arg 2 : rdx : job +ENTRY(sha1_mb_mgr_submit_avx2) + + mov %rsp, %r10 + sub $STACK_SPACE, %rsp + and $~31, %rsp + + mov %rbx, (%rsp) + mov %r10, 8*2(%rsp) #save old rsp + mov %rbp, 8*3(%rsp) + mov %r12, 8*4(%rsp) + mov %r13, 8*5(%rsp) + mov %r14, 8*6(%rsp) + mov %r15, 8*7(%rsp) + + mov _unused_lanes(state), unused_lanes + mov unused_lanes, lane + and $0xF, lane + shr $4, unused_lanes + imul $_LANE_DATA_size, lane, lane_data + movl $STS_BEING_PROCESSED, _status(job) + lea _ldata(state, lane_data), lane_data + mov unused_lanes, _unused_lanes(state) + movl _len(job), DWORD_len + + mov job, _job_in_lane(lane_data) + shl $4, len + or lane, len + + movl DWORD_len, _lens(state , lane, 4) + + # Load digest words from result_digest + vmovdqu _result_digest(job), %xmm0 + mov _result_digest+1*16(job), DWORD_tmp + vmovd %xmm0, _args_digest(state, lane, 4) + vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) + vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) + vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) + movl DWORD_tmp, _args_digest+4*32(state , lane, 4) + + mov _buffer(job), p + mov p, _args_data_ptr(state, lane, 8) + + cmp $0xF, unused_lanes + jne return_null + +start_loop: + # Find min length + vmovdqa _lens(state), %xmm0 + vmovdqa _lens+1*16(state), %xmm1 + + vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} + vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} + vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} + vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword + + vmovd %xmm2, DWORD_idx + mov idx, len2 + and $0xF, idx + shr $4, len2 + jz len_is_0 + + vpand clear_low_nibble(%rip), %xmm2, %xmm2 + vpshufd $0, %xmm2, %xmm2 + + vpsubd %xmm2, %xmm0, %xmm0 + vpsubd %xmm2, %xmm1, %xmm1 + + vmovdqa %xmm0, _lens + 0*16(state) + vmovdqa %xmm1, _lens + 1*16(state) + + + # "state" and "args" are the same address, arg1 + # len is arg2 + call sha1_x8_avx2 + + # state and idx are intact + +len_is_0: + # process completed job "idx" + imul $_LANE_DATA_size, idx, lane_data + lea _ldata(state, lane_data), lane_data + + mov _job_in_lane(lane_data), job_rax + mov _unused_lanes(state), unused_lanes + movq $0, _job_in_lane(lane_data) + movl $STS_COMPLETED, _status(job_rax) + shl $4, unused_lanes + or idx, unused_lanes + mov unused_lanes, _unused_lanes(state) + + movl $0xFFFFFFFF, _lens(state, idx, 4) + + vmovd _args_digest(state, idx, 4), %xmm0 + vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 + vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 + vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 + movl 4*32(state, idx, 4), DWORD_tmp + + vmovdqu %xmm0, _result_digest(job_rax) + movl DWORD_tmp, _result_digest+1*16(job_rax) + +return: + + mov (%rsp), %rbx + mov 8*2(%rsp), %r10 #save old rsp + mov 8*3(%rsp), %rbp + mov 8*4(%rsp), %r12 + mov 8*5(%rsp), %r13 + mov 8*6(%rsp), %r14 + mov 8*7(%rsp), %r15 + mov %r10, %rsp + + ret + +return_null: + xor job_rax, job_rax + jmp return + +ENDPROC(sha1_mb_mgr_submit_avx2) + +.data + +.align 16 +clear_low_nibble: + .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S new file mode 100644 index 00000000000000..8e1b47792b319c --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha1_x8_avx2.S @@ -0,0 +1,472 @@ +/* + * Multi-buffer SHA1 algorithm hash compute routine + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "sha1_mb_mgr_datastruct.S" + +## code to compute oct SHA1 using SSE-256 +## outer calling routine takes care of save and restore of XMM registers + +## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15 +## +## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 +## Linux preserves: rdi rbp r8 +## +## clobbers ymm0-15 + + +# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 +# "transpose" data in {r0...r7} using temps {t0...t1} +# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} +# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} +# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} +# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} +# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} +# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} +# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} +# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} +# +# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} +# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} +# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} +# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} +# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} +# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} +# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} +# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} +# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} +# + +.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 + # process top half (r0..r3) {a...d} + vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} + vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} + vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} + vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} + vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} + vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} + vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} + vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} + + # use r2 in place of t0 + # process bottom half (r4..r7) {e...h} + vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} + vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} + vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} + vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} + vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} + vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} + vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} + vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} + + vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 + vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 + vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 + vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 + vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 + vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 + vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 + vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 + +.endm +## +## Magic functions defined in FIPS 180-1 +## +# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D))) +.macro MAGIC_F0 regF regB regC regD regT + vpxor \regD, \regC, \regF + vpand \regB, \regF, \regF + vpxor \regD, \regF, \regF +.endm + +# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D) +.macro MAGIC_F1 regF regB regC regD regT + vpxor \regC, \regD, \regF + vpxor \regB, \regF, \regF +.endm + +# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D)) +.macro MAGIC_F2 regF regB regC regD regT + vpor \regC, \regB, \regF + vpand \regC, \regB, \regT + vpand \regD, \regF, \regF + vpor \regT, \regF, \regF +.endm + +# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D) +.macro MAGIC_F3 regF regB regC regD regT + MAGIC_F1 \regF,\regB,\regC,\regD,\regT +.endm + +# PROLD reg, imm, tmp +.macro PROLD reg imm tmp + vpsrld $(32-\imm), \reg, \tmp + vpslld $\imm, \reg, \reg + vpor \tmp, \reg, \reg +.endm + +.macro PROLD_nd reg imm tmp src + vpsrld $(32-\imm), \src, \tmp + vpslld $\imm, \src, \reg + vpor \tmp, \reg, \reg +.endm + +.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC + vpaddd \immCNT, \regE, \regE + vpaddd \memW*32(%rsp), \regE, \regE + PROLD_nd \regT, 5, \regF, \regA + vpaddd \regT, \regE, \regE + \MAGIC \regF, \regB, \regC, \regD, \regT + PROLD \regB, 30, \regT + vpaddd \regF, \regE, \regE +.endm + +.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC + vpaddd \immCNT, \regE, \regE + offset = ((\memW - 14) & 15) * 32 + vmovdqu offset(%rsp), W14 + vpxor W14, W16, W16 + offset = ((\memW - 8) & 15) * 32 + vpxor offset(%rsp), W16, W16 + offset = ((\memW - 3) & 15) * 32 + vpxor offset(%rsp), W16, W16 + vpsrld $(32-1), W16, \regF + vpslld $1, W16, W16 + vpor W16, \regF, \regF + + ROTATE_W + + offset = ((\memW - 0) & 15) * 32 + vmovdqu \regF, offset(%rsp) + vpaddd \regF, \regE, \regE + PROLD_nd \regT, 5, \regF, \regA + vpaddd \regT, \regE, \regE + \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D) + PROLD \regB,30, \regT + vpaddd \regF, \regE, \regE +.endm + +######################################################################## +######################################################################## +######################################################################## + +## FRAMESZ plus pushes must be an odd multiple of 8 +YMM_SAVE = (15-15)*32 +FRAMESZ = 32*16 + YMM_SAVE +_YMM = FRAMESZ - YMM_SAVE + +#define VMOVPS vmovups + +IDX = %rax +inp0 = %r9 +inp1 = %r10 +inp2 = %r11 +inp3 = %r12 +inp4 = %r13 +inp5 = %r14 +inp6 = %r15 +inp7 = %rcx +arg1 = %rdi +arg2 = %rsi +RSP_SAVE = %rdx + +# ymm0 A +# ymm1 B +# ymm2 C +# ymm3 D +# ymm4 E +# ymm5 F AA +# ymm6 T0 BB +# ymm7 T1 CC +# ymm8 T2 DD +# ymm9 T3 EE +# ymm10 T4 TMP +# ymm11 T5 FUN +# ymm12 T6 K +# ymm13 T7 W14 +# ymm14 T8 W15 +# ymm15 T9 W16 + + +A = %ymm0 +B = %ymm1 +C = %ymm2 +D = %ymm3 +E = %ymm4 +F = %ymm5 +T0 = %ymm6 +T1 = %ymm7 +T2 = %ymm8 +T3 = %ymm9 +T4 = %ymm10 +T5 = %ymm11 +T6 = %ymm12 +T7 = %ymm13 +T8 = %ymm14 +T9 = %ymm15 + +AA = %ymm5 +BB = %ymm6 +CC = %ymm7 +DD = %ymm8 +EE = %ymm9 +TMP = %ymm10 +FUN = %ymm11 +K = %ymm12 +W14 = %ymm13 +W15 = %ymm14 +W16 = %ymm15 + +.macro ROTATE_ARGS + TMP_ = E + E = D + D = C + C = B + B = A + A = TMP_ +.endm + +.macro ROTATE_W +TMP_ = W16 +W16 = W15 +W15 = W14 +W14 = TMP_ +.endm + +# 8 streams x 5 32bit words per digest x 4 bytes per word +#define DIGEST_SIZE (8*5*4) + +.align 32 + +# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size) +# arg 1 : pointer to array[4] of pointer to input data +# arg 2 : size (in blocks) ;; assumed to be >= 1 +# +ENTRY(sha1_x8_avx2) + + push RSP_SAVE + + #save rsp + mov %rsp, RSP_SAVE + sub $FRAMESZ, %rsp + + #align rsp to 32 Bytes + and $~0x1F, %rsp + + ## Initialize digests + vmovdqu 0*32(arg1), A + vmovdqu 1*32(arg1), B + vmovdqu 2*32(arg1), C + vmovdqu 3*32(arg1), D + vmovdqu 4*32(arg1), E + + ## transpose input onto stack + mov _data_ptr+0*8(arg1),inp0 + mov _data_ptr+1*8(arg1),inp1 + mov _data_ptr+2*8(arg1),inp2 + mov _data_ptr+3*8(arg1),inp3 + mov _data_ptr+4*8(arg1),inp4 + mov _data_ptr+5*8(arg1),inp5 + mov _data_ptr+6*8(arg1),inp6 + mov _data_ptr+7*8(arg1),inp7 + + xor IDX, IDX +lloop: + vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F + I=0 +.rep 2 + VMOVPS (inp0, IDX), T0 + VMOVPS (inp1, IDX), T1 + VMOVPS (inp2, IDX), T2 + VMOVPS (inp3, IDX), T3 + VMOVPS (inp4, IDX), T4 + VMOVPS (inp5, IDX), T5 + VMOVPS (inp6, IDX), T6 + VMOVPS (inp7, IDX), T7 + + TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + vpshufb F, T0, T0 + vmovdqu T0, (I*8)*32(%rsp) + vpshufb F, T1, T1 + vmovdqu T1, (I*8+1)*32(%rsp) + vpshufb F, T2, T2 + vmovdqu T2, (I*8+2)*32(%rsp) + vpshufb F, T3, T3 + vmovdqu T3, (I*8+3)*32(%rsp) + vpshufb F, T4, T4 + vmovdqu T4, (I*8+4)*32(%rsp) + vpshufb F, T5, T5 + vmovdqu T5, (I*8+5)*32(%rsp) + vpshufb F, T6, T6 + vmovdqu T6, (I*8+6)*32(%rsp) + vpshufb F, T7, T7 + vmovdqu T7, (I*8+7)*32(%rsp) + add $32, IDX + I = (I+1) +.endr + # save old digests + vmovdqu A,AA + vmovdqu B,BB + vmovdqu C,CC + vmovdqu D,DD + vmovdqu E,EE + +## +## perform 0-79 steps +## + vmovdqu K00_19(%rip), K +## do rounds 0...15 + I = 0 +.rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 16...19 + vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16 + vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15 +.rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 20...39 + vmovdqu K20_39(%rip), K +.rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 40...59 + vmovdqu K40_59(%rip), K +.rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2 + ROTATE_ARGS + I = (I+1) +.endr + +## do rounds 60...79 + vmovdqu K60_79(%rip), K +.rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3 + ROTATE_ARGS + I = (I+1) +.endr + + vpaddd AA,A,A + vpaddd BB,B,B + vpaddd CC,C,C + vpaddd DD,D,D + vpaddd EE,E,E + + sub $1, arg2 + jne lloop + + # write out digests + vmovdqu A, 0*32(arg1) + vmovdqu B, 1*32(arg1) + vmovdqu C, 2*32(arg1) + vmovdqu D, 3*32(arg1) + vmovdqu E, 4*32(arg1) + + # update input pointers + add IDX, inp0 + add IDX, inp1 + add IDX, inp2 + add IDX, inp3 + add IDX, inp4 + add IDX, inp5 + add IDX, inp6 + add IDX, inp7 + mov inp0, _data_ptr (arg1) + mov inp1, _data_ptr + 1*8(arg1) + mov inp2, _data_ptr + 2*8(arg1) + mov inp3, _data_ptr + 3*8(arg1) + mov inp4, _data_ptr + 4*8(arg1) + mov inp5, _data_ptr + 5*8(arg1) + mov inp6, _data_ptr + 6*8(arg1) + mov inp7, _data_ptr + 7*8(arg1) + + ################ + ## Postamble + + mov RSP_SAVE, %rsp + pop RSP_SAVE + + ret +ENDPROC(sha1_x8_avx2) + + +.data + +.align 32 +K00_19: +.octa 0x5A8279995A8279995A8279995A827999 +.octa 0x5A8279995A8279995A8279995A827999 +K20_39: +.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 +.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 +K40_59: +.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC +.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC +K60_79: +.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 +.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 +PSHUFFLE_BYTE_FLIP_MASK: +.octa 0x0c0d0e0f08090a0b0405060700010203 +.octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha-mb/sha_mb_ctx.h b/arch/x86/crypto/sha-mb/sha_mb_ctx.h new file mode 100644 index 00000000000000..e36069d0c1bdbb --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha_mb_ctx.h @@ -0,0 +1,136 @@ +/* + * Header file for multi buffer SHA context + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SHA_MB_CTX_INTERNAL_H +#define _SHA_MB_CTX_INTERNAL_H + +#include "sha_mb_mgr.h" + +#define HASH_UPDATE 0x00 +#define HASH_FIRST 0x01 +#define HASH_LAST 0x02 +#define HASH_ENTIRE 0x03 +#define HASH_DONE 0x04 +#define HASH_FINAL 0x08 + +#define HASH_CTX_STS_IDLE 0x00 +#define HASH_CTX_STS_PROCESSING 0x01 +#define HASH_CTX_STS_LAST 0x02 +#define HASH_CTX_STS_COMPLETE 0x04 + +enum hash_ctx_error { + HASH_CTX_ERROR_NONE = 0, + HASH_CTX_ERROR_INVALID_FLAGS = -1, + HASH_CTX_ERROR_ALREADY_PROCESSING = -2, + HASH_CTX_ERROR_ALREADY_COMPLETED = -3, + +#ifdef HASH_CTX_DEBUG + HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, +#endif +}; + + +#define hash_ctx_user_data(ctx) ((ctx)->user_data) +#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) +#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) +#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) +#define hash_ctx_status(ctx) ((ctx)->status) +#define hash_ctx_error(ctx) ((ctx)->error) +#define hash_ctx_init(ctx) \ + do { \ + (ctx)->error = HASH_CTX_ERROR_NONE; \ + (ctx)->status = HASH_CTX_STS_COMPLETE; \ + } while (0) + + +/* Hash Constants and Typedefs */ +#define SHA1_DIGEST_LENGTH 5 +#define SHA1_LOG2_BLOCK_SIZE 6 + +#define SHA1_PADLENGTHFIELD_SIZE 8 + +#ifdef SHA_MB_DEBUG +#define assert(expr) \ +do { \ + if (unlikely(!(expr))) { \ + printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ + #expr, __FILE__, __func__, __LINE__); \ + } \ +} while (0) +#else +#define assert(expr) do {} while (0) +#endif + +struct sha1_ctx_mgr { + struct sha1_mb_mgr mgr; +}; + +/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */ + +struct sha1_hash_ctx { + /* Must be at struct offset 0 */ + struct job_sha1 job; + /* status flag */ + int status; + /* error flag */ + int error; + + uint32_t total_length; + const void *incoming_buffer; + uint32_t incoming_buffer_length; + uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; + uint32_t partial_block_buffer_length; + void *user_data; +}; + +#endif diff --git a/arch/x86/crypto/sha-mb/sha_mb_mgr.h b/arch/x86/crypto/sha-mb/sha_mb_mgr.h new file mode 100644 index 00000000000000..08ad1a9acfd727 --- /dev/null +++ b/arch/x86/crypto/sha-mb/sha_mb_mgr.h @@ -0,0 +1,110 @@ +/* + * Header file for multi buffer SHA1 algorithm manager + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * James Guilford + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __SHA_MB_MGR_H +#define __SHA_MB_MGR_H + + +#include + +#define NUM_SHA1_DIGEST_WORDS 5 + +enum job_sts { STS_UNKNOWN = 0, + STS_BEING_PROCESSED = 1, + STS_COMPLETED = 2, + STS_INTERNAL_ERROR = 3, + STS_ERROR = 4 +}; + +struct job_sha1 { + u8 *buffer; + u32 len; + u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32); + enum job_sts status; + void *user_data; +}; + +/* SHA1 out-of-order scheduler */ + +/* typedef uint32_t sha1_digest_array[5][8]; */ + +struct sha1_args_x8 { + uint32_t digest[5][8]; + uint8_t *data_ptr[8]; +}; + +struct sha1_lane_data { + struct job_sha1 *job_in_lane; +}; + +struct sha1_mb_mgr { + struct sha1_args_x8 args; + + uint32_t lens[8]; + + /* each byte is index (0...7) of unused lanes */ + uint64_t unused_lanes; + /* byte 4 is set to FF as a flag */ + struct sha1_lane_data ldata[8]; +}; + + +#define SHA1_MB_MGR_NUM_LANES_AVX2 8 + +void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state); +struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state, + struct job_sha1 *job); +struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state); +struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state); + +#endif diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S new file mode 100644 index 00000000000000..1cd792db15efe7 --- /dev/null +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -0,0 +1,708 @@ +/* + * Implement fast SHA-1 with AVX2 instructions. (x86_64) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Ilya Albrekht + * Maxim Locktyukhin + * Ronen Zohar + * Chandramouli Narayanan + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * SHA-1 implementation with Intel(R) AVX2 instruction set extensions. + * + *This implementation is based on the previous SSSE3 release: + *Visit http://software.intel.com/en-us/articles/ + *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/ + * + *Updates 20-byte SHA-1 record in 'hash' for even number of + *'num_blocks' consecutive 64-byte blocks + * + *extern "C" void sha1_transform_avx2( + * int *hash, const char* input, size_t num_blocks ); + */ + +#include + +#define CTX %rdi /* arg1 */ +#define BUF %rsi /* arg2 */ +#define CNT %rdx /* arg3 */ + +#define REG_A %ecx +#define REG_B %esi +#define REG_C %edi +#define REG_D %eax +#define REG_E %edx +#define REG_TB %ebx +#define REG_TA %r12d +#define REG_RA %rcx +#define REG_RB %rsi +#define REG_RC %rdi +#define REG_RD %rax +#define REG_RE %rdx +#define REG_RTA %r12 +#define REG_RTB %rbx +#define REG_T1 %ebp +#define xmm_mov vmovups +#define avx2_zeroupper vzeroupper +#define RND_F1 1 +#define RND_F2 2 +#define RND_F3 3 + +.macro REGALLOC + .set A, REG_A + .set B, REG_B + .set C, REG_C + .set D, REG_D + .set E, REG_E + .set TB, REG_TB + .set TA, REG_TA + + .set RA, REG_RA + .set RB, REG_RB + .set RC, REG_RC + .set RD, REG_RD + .set RE, REG_RE + + .set RTA, REG_RTA + .set RTB, REG_RTB + + .set T1, REG_T1 +.endm + +#define K_BASE %r8 +#define HASH_PTR %r9 +#define BUFFER_PTR %r10 +#define BUFFER_PTR2 %r13 +#define BUFFER_END %r11 + +#define PRECALC_BUF %r14 +#define WK_BUF %r15 + +#define W_TMP %xmm0 +#define WY_TMP %ymm0 +#define WY_TMP2 %ymm9 + +# AVX2 variables +#define WY0 %ymm3 +#define WY4 %ymm5 +#define WY08 %ymm7 +#define WY12 %ymm8 +#define WY16 %ymm12 +#define WY20 %ymm13 +#define WY24 %ymm14 +#define WY28 %ymm15 + +#define YMM_SHUFB_BSWAP %ymm10 + +/* + * Keep 2 iterations precalculated at a time: + * - 80 DWORDs per iteration * 2 + */ +#define W_SIZE (80*2*2 +16) + +#define WK(t) ((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF) +#define PRECALC_WK(t) ((t)*2*2)(PRECALC_BUF) + + +.macro UPDATE_HASH hash, val + add \hash, \val + mov \val, \hash +.endm + +.macro PRECALC_RESET_WY + .set WY_00, WY0 + .set WY_04, WY4 + .set WY_08, WY08 + .set WY_12, WY12 + .set WY_16, WY16 + .set WY_20, WY20 + .set WY_24, WY24 + .set WY_28, WY28 + .set WY_32, WY_00 +.endm + +.macro PRECALC_ROTATE_WY + /* Rotate macros */ + .set WY_32, WY_28 + .set WY_28, WY_24 + .set WY_24, WY_20 + .set WY_20, WY_16 + .set WY_16, WY_12 + .set WY_12, WY_08 + .set WY_08, WY_04 + .set WY_04, WY_00 + .set WY_00, WY_32 + + /* Define register aliases */ + .set WY, WY_00 + .set WY_minus_04, WY_04 + .set WY_minus_08, WY_08 + .set WY_minus_12, WY_12 + .set WY_minus_16, WY_16 + .set WY_minus_20, WY_20 + .set WY_minus_24, WY_24 + .set WY_minus_28, WY_28 + .set WY_minus_32, WY +.endm + +.macro PRECALC_00_15 + .if (i == 0) # Initialize and rotate registers + PRECALC_RESET_WY + PRECALC_ROTATE_WY + .endif + + /* message scheduling pre-compute for rounds 0-15 */ + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP + .elseif ((i & 7) == 1) + vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ + WY_TMP, WY_TMP + .elseif ((i & 7) == 2) + vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY + .elseif ((i & 7) == 4) + vpaddd K_XMM(K_BASE), WY, WY_TMP + .elseif ((i & 7) == 7) + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC_16_31 + /* + * message scheduling pre-compute for rounds 16-31 + * calculating last 32 w[i] values in 8 XMM registers + * pre-calculate K+w[i] values and store to mem + * for later load by ALU add instruction + * + * "brute force" vectorization for rounds 16-31 only + * due to w[i]->w[i-3] dependency + */ + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + /* w[i-14] */ + vpalignr $8, WY_minus_16, WY_minus_12, WY + vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */ + .elseif ((i & 7) == 1) + vpxor WY_minus_08, WY, WY + vpxor WY_minus_16, WY_TMP, WY_TMP + .elseif ((i & 7) == 2) + vpxor WY_TMP, WY, WY + vpslldq $12, WY, WY_TMP2 + .elseif ((i & 7) == 3) + vpslld $1, WY, WY_TMP + vpsrld $31, WY, WY + .elseif ((i & 7) == 4) + vpor WY, WY_TMP, WY_TMP + vpslld $2, WY_TMP2, WY + .elseif ((i & 7) == 5) + vpsrld $30, WY_TMP2, WY_TMP2 + vpxor WY, WY_TMP, WY_TMP + .elseif ((i & 7) == 7) + vpxor WY_TMP2, WY_TMP, WY + vpaddd K_XMM(K_BASE), WY, WY_TMP + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC_32_79 + /* + * in SHA-1 specification: + * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 + * instead we do equal: + * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 + * allows more efficient vectorization + * since w[i]=>w[i-3] dependency is broken + */ + + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + vpalignr $8, WY_minus_08, WY_minus_04, WY_TMP + .elseif ((i & 7) == 1) + /* W is W_minus_32 before xor */ + vpxor WY_minus_28, WY, WY + .elseif ((i & 7) == 2) + vpxor WY_minus_16, WY_TMP, WY_TMP + .elseif ((i & 7) == 3) + vpxor WY_TMP, WY, WY + .elseif ((i & 7) == 4) + vpslld $2, WY, WY_TMP + .elseif ((i & 7) == 5) + vpsrld $30, WY, WY + vpor WY, WY_TMP, WY + .elseif ((i & 7) == 7) + vpaddd K_XMM(K_BASE), WY, WY_TMP + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC r, s + .set i, \r + + .if (i < 40) + .set K_XMM, 32*0 + .elseif (i < 80) + .set K_XMM, 32*1 + .elseif (i < 120) + .set K_XMM, 32*2 + .else + .set K_XMM, 32*3 + .endif + + .if (i<32) + PRECALC_00_15 \s + .elseif (i<64) + PRECALC_16_31 \s + .elseif (i < 160) + PRECALC_32_79 \s + .endif +.endm + +.macro ROTATE_STATE + .set T_REG, E + .set E, D + .set D, C + .set C, B + .set B, TB + .set TB, A + .set A, T_REG + + .set T_REG, RE + .set RE, RD + .set RD, RC + .set RC, RB + .set RB, RTB + .set RTB, RA + .set RA, T_REG +.endm + +/* Macro relies on saved ROUND_Fx */ + +.macro RND_FUN f, r + .if (\f == RND_F1) + ROUND_F1 \r + .elseif (\f == RND_F2) + ROUND_F2 \r + .elseif (\f == RND_F3) + ROUND_F3 \r + .endif +.endm + +.macro RR r + .set round_id, (\r % 80) + + .if (round_id == 0) /* Precalculate F for first round */ + .set ROUND_FUNC, RND_F1 + mov B, TB + + rorx $(32-30), B, B /* b>>>2 */ + andn D, TB, T1 + and C, TB + xor T1, TB + .endif + + RND_FUN ROUND_FUNC, \r + ROTATE_STATE + + .if (round_id == 18) + .set ROUND_FUNC, RND_F2 + .elseif (round_id == 38) + .set ROUND_FUNC, RND_F3 + .elseif (round_id == 58) + .set ROUND_FUNC, RND_F2 + .endif + + .set round_id, ( (\r+1) % 80) + + RND_FUN ROUND_FUNC, (\r+1) + ROTATE_STATE +.endm + +.macro ROUND_F1 r + add WK(\r), E + + andn C, A, T1 /* ~b&d */ + lea (RE,RTB), E /* Add F from the previous round */ + + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + rorx $(32-30),A, TB /* b>>>2 for next round */ + + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + /* + * Calculate F for the next round + * (b & c) ^ andn[b, d] + */ + and B, A /* b&c */ + xor T1, A /* F1 = (b&c) ^ (~b&d) */ + + lea (RE,RTA), E /* E += A >>> 5 */ +.endm + +.macro ROUND_F2 r + add WK(\r), E + lea (RE,RTB), E /* Add F from the previous round */ + + /* Calculate F for the next round */ + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + .if ((round_id) < 79) + rorx $(32-30), A, TB /* b>>>2 for next round */ + .endif + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + .if ((round_id) < 79) + xor B, A + .endif + + add TA, E /* E += A >>> 5 */ + + .if ((round_id) < 79) + xor C, A + .endif +.endm + +.macro ROUND_F3 r + add WK(\r), E + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + lea (RE,RTB), E /* Add F from the previous round */ + + mov B, T1 + or A, T1 + + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + rorx $(32-30), A, TB /* b>>>2 for next round */ + + /* Calculate F for the next round + * (b and c) or (d and (b or c)) + */ + and C, T1 + and B, A + or T1, A + + add TA, E /* E += A >>> 5 */ + +.endm + +/* + * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining + */ +.macro SHA1_PIPELINED_MAIN_BODY + + REGALLOC + + mov (HASH_PTR), A + mov 4(HASH_PTR), B + mov 8(HASH_PTR), C + mov 12(HASH_PTR), D + mov 16(HASH_PTR), E + + mov %rsp, PRECALC_BUF + lea (2*4*80+32)(%rsp), WK_BUF + + # Precalc WK for first 2 blocks + PRECALC_OFFSET = 0 + .set i, 0 + .rept 160 + PRECALC i + .set i, i + 1 + .endr + PRECALC_OFFSET = 128 + xchg WK_BUF, PRECALC_BUF + + .align 32 +_loop: + /* + * code loops through more than one block + * we use K_BASE value as a signal of a last block, + * it is set below by: cmovae BUFFER_PTR, K_BASE + */ + cmp K_BASE, BUFFER_PTR + jne _begin + .align 32 + jmp _end + .align 32 +_begin: + + /* + * Do first block + * rounds: 0,2,4,6,8 + */ + .set j, 0 + .rept 5 + RR j + .set j, j+2 + .endr + + jmp _loop0 +_loop0: + + /* + * rounds: + * 10,12,14,16,18 + * 20,22,24,26,28 + * 30,32,34,36,38 + * 40,42,44,46,48 + * 50,52,54,56,58 + */ + .rept 25 + RR j + .set j, j+2 + .endr + + add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ + cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ + cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + + /* + * rounds + * 60,62,64,66,68 + * 70,72,74,76,78 + */ + .rept 10 + RR j + .set j, j+2 + .endr + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), TB + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + cmp K_BASE, BUFFER_PTR /* is current block the last one? */ + je _loop + + mov TB, B + + /* Process second block */ + /* + * rounds + * 0+80, 2+80, 4+80, 6+80, 8+80 + * 10+80,12+80,14+80,16+80,18+80 + */ + + .set j, 0 + .rept 10 + RR j+80 + .set j, j+2 + .endr + + jmp _loop1 +_loop1: + /* + * rounds + * 20+80,22+80,24+80,26+80,28+80 + * 30+80,32+80,34+80,36+80,38+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + jmp _loop2 +_loop2: + + /* + * rounds + * 40+80,42+80,44+80,46+80,48+80 + * 50+80,52+80,54+80,56+80,58+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ + + cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ + cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + + jmp _loop3 +_loop3: + + /* + * rounds + * 60+80,62+80,64+80,66+80,68+80 + * 70+80,72+80,74+80,76+80,78+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), TB + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + /* Reset state for AVX2 reg permutation */ + mov A, TA + mov TB, A + mov C, TB + mov E, C + mov D, B + mov TA, D + + REGALLOC + + xchg WK_BUF, PRECALC_BUF + + jmp _loop + + .align 32 + _end: + +.endm +/* + * macro implements SHA-1 function's body for several 64-byte blocks + * param: function's name + */ +.macro SHA1_VECTOR_ASM name + ENTRY(\name) + + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + RESERVE_STACK = (W_SIZE*4 + 8+24) + + /* Align stack */ + mov %rsp, %rbx + and $~(0x20-1), %rsp + push %rbx + sub $RESERVE_STACK, %rsp + + avx2_zeroupper + + lea K_XMM_AR(%rip), K_BASE + + mov CTX, HASH_PTR + mov BUF, BUFFER_PTR + lea 64(BUF), BUFFER_PTR2 + + shl $6, CNT /* mul by 64 */ + add BUF, CNT + add $64, CNT + mov CNT, BUFFER_END + + cmp BUFFER_END, BUFFER_PTR2 + cmovae K_BASE, BUFFER_PTR2 + + xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP + + SHA1_PIPELINED_MAIN_BODY + + avx2_zeroupper + + add $RESERVE_STACK, %rsp + pop %rsp + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + + ret + + ENDPROC(\name) +.endm + +.section .rodata + +#define K1 0x5a827999 +#define K2 0x6ed9eba1 +#define K3 0x8f1bbcdc +#define K4 0xca62c1d6 + +.align 128 +K_XMM_AR: + .long K1, K1, K1, K1 + .long K1, K1, K1, K1 + .long K2, K2, K2, K2 + .long K2, K2, K2, K2 + .long K3, K3, K3, K3 + .long K3, K3, K3, K3 + .long K4, K4, K4, K4 + .long K4, K4, K4, K4 + +BSWAP_SHUFB_CTL: + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f +.text + +SHA1_VECTOR_ASM sha1_transform_avx2 diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 4a11a9d7245162..74d16ef707c790 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -10,6 +10,7 @@ * Copyright (c) Andrew McDonald * Copyright (c) Jean-Francois Dive * Copyright (c) Mathias Krause + * Copyright (c) Chandramouli Narayanan * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, asmlinkage void sha1_transform_avx(u32 *digest, const char *data, unsigned int rounds); #endif +#ifdef CONFIG_AS_AVX2 +#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ + +asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, + unsigned int rounds); +#endif static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); @@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in) return 0; } +#ifdef CONFIG_AS_AVX2 +static void sha1_apply_transform_avx2(u32 *digest, const char *data, + unsigned int rounds) +{ + /* Select the optimal transform based on data block size */ + if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(digest, data, rounds); + else + sha1_transform_avx(digest, data, rounds); +} +#endif + static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_ssse3_init, @@ -201,27 +220,49 @@ static bool __init avx_usable(void) return true; } + +#ifdef CONFIG_AS_AVX2 +static bool __init avx2_usable(void) +{ + if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} +#endif #endif static int __init sha1_ssse3_mod_init(void) { + char *algo_name; + /* test for SSSE3 first */ - if (cpu_has_ssse3) + if (cpu_has_ssse3) { sha1_transform_asm = sha1_transform_ssse3; + algo_name = "SSSE3"; + } #ifdef CONFIG_AS_AVX /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) + if (avx_usable()) { sha1_transform_asm = sha1_transform_avx; + algo_name = "AVX"; +#ifdef CONFIG_AS_AVX2 + /* allow AVX2 to override AVX, it's a little faster */ + if (avx2_usable()) { + sha1_transform_asm = sha1_apply_transform_avx2; + algo_name = "AVX2"; + } +#endif + } #endif if (sha1_transform_asm) { - pr_info("Using %s optimized SHA-1 implementation\n", - sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3" - : "AVX"); + pr_info("Using %s optimized SHA-1 implementation\n", algo_name); return crypto_register_shash(&alg); } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 597d4da696561a..b14786da4a1fc6 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -235,7 +235,7 @@ static int __init sha256_ssse3_mod_init(void) /* allow AVX to override SSSE3, it's a little faster */ if (avx_usable()) { #ifdef CONFIG_AS_AVX2 - if (boot_cpu_has(X86_FEATURE_AVX2)) + if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) sha256_transform_asm = sha256_transform_rorx; else #endif diff --git a/crypto/Kconfig b/crypto/Kconfig index dc7650c45972dc..6822291bb90738 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -158,6 +158,20 @@ config CRYPTO_CRYPTD converts an arbitrary synchronous software crypto algorithm into an asynchronous algorithm that executes in a kernel thread. +config CRYPTO_MCRYPTD + tristate "Software async multi-buffer crypto daemon" + select CRYPTO_BLKCIPHER + select CRYPTO_HASH + select CRYPTO_MANAGER + select CRYPTO_WORKQUEUE + help + This is a generic software asynchronous crypto daemon that + provides the kernel thread to assist multi-buffer crypto + algorithms for submitting jobs and flushing jobs in multi-buffer + crypto algorithms. Multi-buffer crypto algorithms are executed + in the context of this kernel thread and drivers can post + their crypto request asyncrhously and process by this daemon. + config CRYPTO_AUTHENC tristate "Authenc support" select CRYPTO_AEAD @@ -493,14 +507,14 @@ config CRYPTO_SHA1 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). config CRYPTO_SHA1_SSSE3 - tristate "SHA1 digest algorithm (SSSE3/AVX)" + tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2)" depends on X86 && 64BIT select CRYPTO_SHA1 select CRYPTO_HASH help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using Supplemental SSE3 (SSSE3) instructions or Advanced Vector - Extensions (AVX), when available. + Extensions (AVX/AVX2), when available. config CRYPTO_SHA256_SSSE3 tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)" @@ -549,6 +563,22 @@ config CRYPTO_SHA1_PPC This is the powerpc hardware accelerated implementation of the SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). +config CRYPTO_SHA1_MB + tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)" + depends on X86 && 64BIT + select CRYPTO_SHA1 + select CRYPTO_HASH + select CRYPTO_MCRYPTD + help + SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented + using multi-buffer technique. This algorithm computes on + multiple data lanes concurrently with SIMD instructions for + better throughput. It should not be enabled by default but + used when there is significant amount of work to keep the keep + the data lanes filled to get performance benefit. If the data + lanes remain unfilled, a flush operation will be initiated to + process the crypto jobs, adding a slight latency. + config CRYPTO_SHA256 tristate "SHA224 and SHA256 digest algorithm" select CRYPTO_HASH diff --git a/crypto/Makefile b/crypto/Makefile index d1f1d390d41eac..ce40738cc8a4fa 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_CRYPTO_GCM) += gcm.o obj-$(CONFIG_CRYPTO_CCM) += ccm.o obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o +obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o obj-$(CONFIG_CRYPTO_DES) += des_generic.o obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o diff --git a/crypto/ahash.c b/crypto/ahash.c index 793a27f2493e52..b6a7df746f43a1 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -46,7 +47,10 @@ static int hash_walk_next(struct crypto_hash_walk *walk) unsigned int nbytes = min(walk->entrylen, ((unsigned int)(PAGE_SIZE)) - offset); - walk->data = kmap_atomic(walk->pg); + if (walk->flags & CRYPTO_ALG_ASYNC) + walk->data = kmap(walk->pg); + else + walk->data = kmap_atomic(walk->pg); walk->data += offset; if (offset & alignmask) { @@ -93,8 +97,16 @@ int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) return nbytes; } - kunmap_atomic(walk->data); - crypto_yield(walk->flags); + if (walk->flags & CRYPTO_ALG_ASYNC) + kunmap(walk->pg); + else { + kunmap_atomic(walk->data); + /* + * The may sleep test only makes sense for sync users. + * Async users don't need to sleep here anyway. + */ + crypto_yield(walk->flags); + } if (err) return err; @@ -119,29 +131,54 @@ int crypto_hash_walk_first(struct ahash_request *req, { walk->total = req->nbytes; - if (!walk->total) + if (!walk->total) { + walk->entrylen = 0; return 0; + } walk->alignmask = crypto_ahash_alignmask(crypto_ahash_reqtfm(req)); walk->sg = req->src; - walk->flags = req->base.flags; + walk->flags = req->base.flags & CRYPTO_TFM_REQ_MASK; return hash_walk_new_entry(walk); } EXPORT_SYMBOL_GPL(crypto_hash_walk_first); +int crypto_ahash_walk_first(struct ahash_request *req, + struct crypto_hash_walk *walk) +{ + walk->total = req->nbytes; + + if (!walk->total) { + walk->entrylen = 0; + return 0; + } + + walk->alignmask = crypto_ahash_alignmask(crypto_ahash_reqtfm(req)); + walk->sg = req->src; + walk->flags = req->base.flags & CRYPTO_TFM_REQ_MASK; + walk->flags |= CRYPTO_ALG_ASYNC; + + BUILD_BUG_ON(CRYPTO_TFM_REQ_MASK & CRYPTO_ALG_ASYNC); + + return hash_walk_new_entry(walk); +} +EXPORT_SYMBOL_GPL(crypto_ahash_walk_first); + int crypto_hash_walk_first_compat(struct hash_desc *hdesc, struct crypto_hash_walk *walk, struct scatterlist *sg, unsigned int len) { walk->total = len; - if (!walk->total) + if (!walk->total) { + walk->entrylen = 0; return 0; + } walk->alignmask = crypto_hash_alignmask(hdesc->tfm); walk->sg = sg; - walk->flags = hdesc->flags; + walk->flags = hdesc->flags & CRYPTO_TFM_REQ_MASK; return hash_walk_new_entry(walk); } diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 666f1962a160f5..7e2b5df89df5b6 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -210,7 +210,11 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx, byte_count = DEFAULT_BLK_SZ; } - err = byte_count; + /* + * Return 0 in case of success as mandated by the kernel + * crypto API interface definition. + */ + err = 0; dbgprint(KERN_CRIT "getting %d random bytes for context %p\n", byte_count, ctx); diff --git a/crypto/drbg.c b/crypto/drbg.c index 9999b465d820d4..782ca3e1d310d1 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1289,7 +1289,7 @@ static void drbg_restore_shadow(struct drbg_state *drbg, * as defined in SP800-90A. The additional input is mixed into * the state in addition to the pulled entropy. * - * return: generated number of bytes + * return: 0 when all bytes are generated; < 0 in case of an error */ static int drbg_generate(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, @@ -1430,6 +1430,11 @@ static int drbg_generate(struct drbg_state *drbg, } #endif + /* + * All operations were successful, return 0 as mandated by + * the kernel crypto API interface. + */ + len = 0; err: if (shadow->d_ops->crypto_fini) shadow->d_ops->crypto_fini(shadow); diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c new file mode 100644 index 00000000000000..b39fbd53010209 --- /dev/null +++ b/crypto/mcryptd.c @@ -0,0 +1,705 @@ +/* + * Software multibuffer async crypto daemon. + * + * Copyright (c) 2014 Tim Chen + * + * Adapted from crypto daemon. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MCRYPTD_MAX_CPU_QLEN 100 +#define MCRYPTD_BATCH 9 + +static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, + unsigned int tail); + +struct mcryptd_flush_list { + struct list_head list; + struct mutex lock; +}; + +static struct mcryptd_flush_list __percpu *mcryptd_flist; + +struct hashd_instance_ctx { + struct crypto_shash_spawn spawn; + struct mcryptd_queue *queue; +}; + +static void mcryptd_queue_worker(struct work_struct *work); + +void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay) +{ + struct mcryptd_flush_list *flist; + + if (!cstate->flusher_engaged) { + /* put the flusher on the flush list */ + flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); + mutex_lock(&flist->lock); + list_add_tail(&cstate->flush_list, &flist->list); + cstate->flusher_engaged = true; + cstate->next_flush = jiffies + delay; + queue_delayed_work_on(smp_processor_id(), kcrypto_wq, + &cstate->flush, delay); + mutex_unlock(&flist->lock); + } +} +EXPORT_SYMBOL(mcryptd_arm_flusher); + +static int mcryptd_init_queue(struct mcryptd_queue *queue, + unsigned int max_cpu_qlen) +{ + int cpu; + struct mcryptd_cpu_queue *cpu_queue; + + queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue); + pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue); + if (!queue->cpu_queue) + return -ENOMEM; + for_each_possible_cpu(cpu) { + cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); + pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue); + crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); + INIT_WORK(&cpu_queue->work, mcryptd_queue_worker); + } + return 0; +} + +static void mcryptd_fini_queue(struct mcryptd_queue *queue) +{ + int cpu; + struct mcryptd_cpu_queue *cpu_queue; + + for_each_possible_cpu(cpu) { + cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); + BUG_ON(cpu_queue->queue.qlen); + } + free_percpu(queue->cpu_queue); +} + +static int mcryptd_enqueue_request(struct mcryptd_queue *queue, + struct crypto_async_request *request, + struct mcryptd_hash_request_ctx *rctx) +{ + int cpu, err; + struct mcryptd_cpu_queue *cpu_queue; + + cpu = get_cpu(); + cpu_queue = this_cpu_ptr(queue->cpu_queue); + rctx->tag.cpu = cpu; + + err = crypto_enqueue_request(&cpu_queue->queue, request); + pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n", + cpu, cpu_queue, request); + queue_work_on(cpu, kcrypto_wq, &cpu_queue->work); + put_cpu(); + + return err; +} + +/* + * Try to opportunisticlly flush the partially completed jobs if + * crypto daemon is the only task running. + */ +static void mcryptd_opportunistic_flush(void) +{ + struct mcryptd_flush_list *flist; + struct mcryptd_alg_cstate *cstate; + + flist = per_cpu_ptr(mcryptd_flist, smp_processor_id()); + while (single_task_running()) { + mutex_lock(&flist->lock); + if (list_empty(&flist->list)) { + mutex_unlock(&flist->lock); + return; + } + cstate = list_entry(flist->list.next, + struct mcryptd_alg_cstate, flush_list); + if (!cstate->flusher_engaged) { + mutex_unlock(&flist->lock); + return; + } + list_del(&cstate->flush_list); + cstate->flusher_engaged = false; + mutex_unlock(&flist->lock); + cstate->alg_state->flusher(cstate); + } +} + +/* + * Called in workqueue context, do one real cryption work (via + * req->complete) and reschedule itself if there are more work to + * do. + */ +static void mcryptd_queue_worker(struct work_struct *work) +{ + struct mcryptd_cpu_queue *cpu_queue; + struct crypto_async_request *req, *backlog; + int i; + + /* + * Need to loop through more than once for multi-buffer to + * be effective. + */ + + cpu_queue = container_of(work, struct mcryptd_cpu_queue, work); + i = 0; + while (i < MCRYPTD_BATCH || single_task_running()) { + /* + * preempt_disable/enable is used to prevent + * being preempted by mcryptd_enqueue_request() + */ + local_bh_disable(); + preempt_disable(); + backlog = crypto_get_backlog(&cpu_queue->queue); + req = crypto_dequeue_request(&cpu_queue->queue); + preempt_enable(); + local_bh_enable(); + + if (!req) { + mcryptd_opportunistic_flush(); + return; + } + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + req->complete(req, 0); + if (!cpu_queue->queue.qlen) + return; + ++i; + } + if (cpu_queue->queue.qlen) + queue_work(kcrypto_wq, &cpu_queue->work); +} + +void mcryptd_flusher(struct work_struct *__work) +{ + struct mcryptd_alg_cstate *alg_cpu_state; + struct mcryptd_alg_state *alg_state; + struct mcryptd_flush_list *flist; + int cpu; + + cpu = smp_processor_id(); + alg_cpu_state = container_of(to_delayed_work(__work), + struct mcryptd_alg_cstate, flush); + alg_state = alg_cpu_state->alg_state; + if (alg_cpu_state->cpu != cpu) + pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n", + cpu, alg_cpu_state->cpu); + + if (alg_cpu_state->flusher_engaged) { + flist = per_cpu_ptr(mcryptd_flist, cpu); + mutex_lock(&flist->lock); + list_del(&alg_cpu_state->flush_list); + alg_cpu_state->flusher_engaged = false; + mutex_unlock(&flist->lock); + alg_state->flusher(alg_cpu_state); + } +} +EXPORT_SYMBOL_GPL(mcryptd_flusher); + +static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); + struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst); + + return ictx->queue; +} + +static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, + unsigned int tail) +{ + char *p; + struct crypto_instance *inst; + int err; + + p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + inst = (void *)(p + head); + + err = -ENAMETOOLONG; + if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + goto out_free_inst; + + memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); + + inst->alg.cra_priority = alg->cra_priority + 50; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + +out: + return p; + +out_free_inst: + kfree(p); + p = ERR_PTR(err); + goto out; +} + +static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); + struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst); + struct crypto_shash_spawn *spawn = &ictx->spawn; + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_shash *hash; + + hash = crypto_spawn_shash(spawn); + if (IS_ERR(hash)) + return PTR_ERR(hash); + + ctx->child = hash; + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mcryptd_hash_request_ctx) + + crypto_shash_descsize(hash)); + return 0; +} + +static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm) +{ + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_shash(ctx->child); +} + +static int mcryptd_hash_setkey(struct crypto_ahash *parent, + const u8 *key, unsigned int keylen) +{ + struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent); + struct crypto_shash *child = ctx->child; + int err; + + crypto_shash_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_shash_set_flags(child, crypto_ahash_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_shash_setkey(child, key, keylen); + crypto_ahash_set_flags(parent, crypto_shash_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int mcryptd_hash_enqueue(struct ahash_request *req, + crypto_completion_t complete) +{ + int ret; + + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct mcryptd_queue *queue = + mcryptd_get_queue(crypto_ahash_tfm(tfm)); + + rctx->complete = req->base.complete; + req->base.complete = complete; + + ret = mcryptd_enqueue_request(queue, &req->base, rctx); + + return ret; +} + +static void mcryptd_hash_init(struct crypto_async_request *req_async, int err) +{ + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); + struct crypto_shash *child = ctx->child; + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct shash_desc *desc = &rctx->desc; + + if (unlikely(err == -EINPROGRESS)) + goto out; + + desc->tfm = child; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + + err = crypto_shash_init(desc); + + req->base.complete = rctx->complete; + +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_init_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_init); +} + +static void mcryptd_hash_update(struct crypto_async_request *req_async, int err) +{ + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + err = shash_ahash_mcryptd_update(req, &rctx->desc); + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_update_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_update); +} + +static void mcryptd_hash_final(struct crypto_async_request *req_async, int err) +{ + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + err = shash_ahash_mcryptd_final(req, &rctx->desc); + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_final_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_final); +} + +static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err) +{ + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + err = shash_ahash_mcryptd_finup(req, &rctx->desc); + + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_finup_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_finup); +} + +static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err) +{ + struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm); + struct crypto_shash *child = ctx->child; + struct ahash_request *req = ahash_request_cast(req_async); + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct shash_desc *desc = &rctx->desc; + + if (unlikely(err == -EINPROGRESS)) + goto out; + + desc->tfm = child; + desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; /* check this again */ + + err = shash_ahash_mcryptd_digest(req, desc); + + if (err) { + req->base.complete = rctx->complete; + goto out; + } + + return; +out: + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); +} + +static int mcryptd_hash_digest_enqueue(struct ahash_request *req) +{ + return mcryptd_hash_enqueue(req, mcryptd_hash_digest); +} + +static int mcryptd_hash_export(struct ahash_request *req, void *out) +{ + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + return crypto_shash_export(&rctx->desc, out); +} + +static int mcryptd_hash_import(struct ahash_request *req, const void *in) +{ + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + + return crypto_shash_import(&rctx->desc, in); +} + +static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, + struct mcryptd_queue *queue) +{ + struct hashd_instance_ctx *ctx; + struct ahash_instance *inst; + struct shash_alg *salg; + struct crypto_alg *alg; + int err; + + salg = shash_attr_alg(tb[1], 0, 0); + if (IS_ERR(salg)) + return PTR_ERR(salg); + + alg = &salg->base; + pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name); + inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(), + sizeof(*ctx)); + err = PTR_ERR(inst); + if (IS_ERR(inst)) + goto out_put_alg; + + ctx = ahash_instance_ctx(inst); + ctx->queue = queue; + + err = crypto_init_shash_spawn(&ctx->spawn, salg, + ahash_crypto_instance(inst)); + if (err) + goto out_free_inst; + + inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC; + + inst->alg.halg.digestsize = salg->digestsize; + inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx); + + inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm; + inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm; + + inst->alg.init = mcryptd_hash_init_enqueue; + inst->alg.update = mcryptd_hash_update_enqueue; + inst->alg.final = mcryptd_hash_final_enqueue; + inst->alg.finup = mcryptd_hash_finup_enqueue; + inst->alg.export = mcryptd_hash_export; + inst->alg.import = mcryptd_hash_import; + inst->alg.setkey = mcryptd_hash_setkey; + inst->alg.digest = mcryptd_hash_digest_enqueue; + + err = ahash_register_instance(tmpl, inst); + if (err) { + crypto_drop_shash(&ctx->spawn); +out_free_inst: + kfree(inst); + } + +out_put_alg: + crypto_mod_put(alg); + return err; +} + +static struct mcryptd_queue mqueue; + +static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct crypto_attr_type *algt; + + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_DIGEST: + return mcryptd_create_hash(tmpl, tb, &mqueue); + break; + } + + return -EINVAL; +} + +static void mcryptd_free(struct crypto_instance *inst) +{ + struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst); + struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst); + + switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_AHASH: + crypto_drop_shash(&hctx->spawn); + kfree(ahash_instance(inst)); + return; + default: + crypto_drop_spawn(&ctx->spawn); + kfree(inst); + } +} + +static struct crypto_template mcryptd_tmpl = { + .name = "mcryptd", + .create = mcryptd_create, + .free = mcryptd_free, + .module = THIS_MODULE, +}; + +struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, + u32 type, u32 mask) +{ + char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME]; + struct crypto_ahash *tfm; + + if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME, + "mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) + return ERR_PTR(-EINVAL); + tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { + crypto_free_ahash(tfm); + return ERR_PTR(-EINVAL); + } + + return __mcryptd_ahash_cast(tfm); +} +EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash); + +int shash_ahash_mcryptd_digest(struct ahash_request *req, + struct shash_desc *desc) +{ + int err; + + err = crypto_shash_init(desc) ?: + shash_ahash_mcryptd_finup(req, desc); + + return err; +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_digest); + +int shash_ahash_mcryptd_update(struct ahash_request *req, + struct shash_desc *desc) +{ + struct crypto_shash *tfm = desc->tfm; + struct shash_alg *shash = crypto_shash_alg(tfm); + + /* alignment is to be done by multi-buffer crypto algorithm if needed */ + + return shash->update(desc, NULL, 0); +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_update); + +int shash_ahash_mcryptd_finup(struct ahash_request *req, + struct shash_desc *desc) +{ + struct crypto_shash *tfm = desc->tfm; + struct shash_alg *shash = crypto_shash_alg(tfm); + + /* alignment is to be done by multi-buffer crypto algorithm if needed */ + + return shash->finup(desc, NULL, 0, req->result); +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_finup); + +int shash_ahash_mcryptd_final(struct ahash_request *req, + struct shash_desc *desc) +{ + struct crypto_shash *tfm = desc->tfm; + struct shash_alg *shash = crypto_shash_alg(tfm); + + /* alignment is to be done by multi-buffer crypto algorithm if needed */ + + return shash->final(desc, req->result); +} +EXPORT_SYMBOL_GPL(shash_ahash_mcryptd_final); + +struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm) +{ + struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base); + + return ctx->child; +} +EXPORT_SYMBOL_GPL(mcryptd_ahash_child); + +struct shash_desc *mcryptd_shash_desc(struct ahash_request *req) +{ + struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + return &rctx->desc; +} +EXPORT_SYMBOL_GPL(mcryptd_shash_desc); + +void mcryptd_free_ahash(struct mcryptd_ahash *tfm) +{ + crypto_free_ahash(&tfm->base); +} +EXPORT_SYMBOL_GPL(mcryptd_free_ahash); + + +static int __init mcryptd_init(void) +{ + int err, cpu; + struct mcryptd_flush_list *flist; + + mcryptd_flist = alloc_percpu(struct mcryptd_flush_list); + for_each_possible_cpu(cpu) { + flist = per_cpu_ptr(mcryptd_flist, cpu); + INIT_LIST_HEAD(&flist->list); + mutex_init(&flist->lock); + } + + err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN); + if (err) { + free_percpu(mcryptd_flist); + return err; + } + + err = crypto_register_template(&mcryptd_tmpl); + if (err) { + mcryptd_fini_queue(&mqueue); + free_percpu(mcryptd_flist); + } + + return err; +} + +static void __exit mcryptd_exit(void) +{ + mcryptd_fini_queue(&mqueue); + crypto_unregister_template(&mcryptd_tmpl); + free_percpu(mcryptd_flist); +} + +subsys_initcall(mcryptd_init); +module_exit(mcryptd_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Software async multibuffer crypto daemon"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 055dba992988ab..2fb2f95f3e7289 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1386,11 +1386,11 @@ static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template, for (j = 0; j < template[i].loops; j++) { err = crypto_rng_get_bytes(tfm, result, template[i].rlen); - if (err != template[i].rlen) { + if (err < 0) { printk(KERN_ERR "alg: cprng: Failed to obtain " "the correct amount of random data for " - "%s (requested %d, got %d)\n", algo, - template[i].rlen, err); + "%s (requested %d)\n", algo, + template[i].rlen); goto out; } } @@ -1673,7 +1673,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr, ret = crypto_drbg_get_bytes_addtl(drng, buf, test->expectedlen, &addtl); } - if (ret <= 0) { + if (ret < 0) { printk(KERN_ERR "alg: drbg: could not obtain random data for" "driver %s\n", driver); goto outbuf; @@ -1688,7 +1688,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr, ret = crypto_drbg_get_bytes_addtl(drng, buf, test->expectedlen, &addtl); } - if (ret <= 0) { + if (ret < 0) { printk(KERN_ERR "alg: drbg: could not obtain random data for" "driver %s\n", driver); goto outbuf; diff --git a/drivers/char/random.c b/drivers/char/random.c index af60efbca53676..b1f490f5c30bbc 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -273,13 +273,27 @@ /* * Configuration information */ -#define INPUT_POOL_WORDS 128 -#define OUTPUT_POOL_WORDS 32 -#define SEC_XFER_SIZE 512 -#define EXTRACT_SIZE 10 +#define INPUT_POOL_SHIFT 12 +#define INPUT_POOL_WORDS (1 << (INPUT_POOL_SHIFT-5)) +#define OUTPUT_POOL_SHIFT 10 +#define OUTPUT_POOL_WORDS (1 << (OUTPUT_POOL_SHIFT-5)) +#define SEC_XFER_SIZE 512 +#define EXTRACT_SIZE 10 #define LONGS(x) (((x) + sizeof(unsigned long) - 1)/sizeof(unsigned long)) +/* + * To allow fractional bits to be tracked, the following fields contain + * this many fractional bits: + * + * entropy_count, trickle_thresh + * + * 2*(ENTROPY_SHIFT + log2(poolbits)) must <= 31, or the multiply in + * credit_entropy_bits() needs to be 64 bits wide. + */ +#define ENTROPY_SHIFT 3 +#define ENTROPY_BITS(r) ((r)->entropy_count >> ENTROPY_SHIFT) + /* * The minimum number of bits of entropy before we wake up a read on * /dev/random. Should be enough to do a significant reseed. @@ -297,8 +311,7 @@ static int random_write_wakeup_thresh = 128; * When the input pool goes over trickle_thresh, start dropping most * samples to avoid wasting CPU time and reduce lock contention. */ - -static int trickle_thresh __read_mostly = INPUT_POOL_WORDS * 28; +static const int trickle_thresh = (INPUT_POOL_WORDS * 28) << ENTROPY_SHIFT; static DEFINE_PER_CPU(int, trickle_count); @@ -310,46 +323,45 @@ static DEFINE_PER_CPU(int, trickle_count); * scaled squared error sum) except for the last tap, which is 1 to * get the twisting happening as fast as possible. */ + static struct poolinfo { - int poolwords; + int poolbitshift, poolwords, poolbytes, poolbits, poolfracbits; +#define S(x) ilog2(x)+5, (x), (x)*4, (x)*32, (x) << (ENTROPY_SHIFT+5) int tap1, tap2, tap3, tap4, tap5; } poolinfo_table[] = { /* x^128 + x^103 + x^76 + x^51 +x^25 + x + 1 -- 105 */ - { 128, 103, 76, 51, 25, 1 }, + { S(128), 103, 76, 51, 25, 1 }, /* x^32 + x^26 + x^20 + x^14 + x^7 + x + 1 -- 15 */ - { 32, 26, 20, 14, 7, 1 }, + { S(32), 26, 20, 14, 7, 1 }, #if 0 /* x^2048 + x^1638 + x^1231 + x^819 + x^411 + x + 1 -- 115 */ - { 2048, 1638, 1231, 819, 411, 1 }, + { S(2048), 1638, 1231, 819, 411, 1 }, /* x^1024 + x^817 + x^615 + x^412 + x^204 + x + 1 -- 290 */ - { 1024, 817, 615, 412, 204, 1 }, + { S(1024), 817, 615, 412, 204, 1 }, /* x^1024 + x^819 + x^616 + x^410 + x^207 + x^2 + 1 -- 115 */ - { 1024, 819, 616, 410, 207, 2 }, + { S(1024), 819, 616, 410, 207, 2 }, /* x^512 + x^411 + x^308 + x^208 + x^104 + x + 1 -- 225 */ - { 512, 411, 308, 208, 104, 1 }, + { S(512), 411, 308, 208, 104, 1 }, /* x^512 + x^409 + x^307 + x^206 + x^102 + x^2 + 1 -- 95 */ - { 512, 409, 307, 206, 102, 2 }, + { S(512), 409, 307, 206, 102, 2 }, /* x^512 + x^409 + x^309 + x^205 + x^103 + x^2 + 1 -- 95 */ - { 512, 409, 309, 205, 103, 2 }, + { S(512), 409, 309, 205, 103, 2 }, /* x^256 + x^205 + x^155 + x^101 + x^52 + x + 1 -- 125 */ - { 256, 205, 155, 101, 52, 1 }, + { S(256), 205, 155, 101, 52, 1 }, /* x^128 + x^103 + x^78 + x^51 + x^27 + x^2 + 1 -- 70 */ - { 128, 103, 78, 51, 27, 2 }, + { S(128), 103, 78, 51, 27, 2 }, /* x^64 + x^52 + x^39 + x^26 + x^14 + x + 1 -- 15 */ - { 64, 52, 39, 26, 14, 1 }, + { S(64), 52, 39, 26, 14, 1 }, #endif }; -#define POOLBITS poolwords*32 -#define POOLBYTES poolwords*4 - /* * For the purposes of better mixing, we use the CRC-32 polynomial as * well to make a twisted Generalized Feedback Shift Reigster @@ -421,7 +433,7 @@ module_param(debug, bool, 0644); struct entropy_store; struct entropy_store { /* read-only data: */ - struct poolinfo *poolinfo; + const struct poolinfo *poolinfo; __u32 *pool; const char *name; struct entropy_store *pull; @@ -583,11 +595,15 @@ static void fast_mix(struct fast_pool *f, const void *in, int nbytes) } /* - * Credit (or debit) the entropy store with n bits of entropy + * Credit (or debit) the entropy store with n bits of entropy. + * Use credit_entropy_bits_safe() if the value comes from userspace + * or otherwise should be checked for extreme values. */ static void credit_entropy_bits(struct entropy_store *r, int nbits) { int entropy_count, orig; + const int pool_size = r->poolinfo->poolfracbits; + int nfrac = nbits << ENTROPY_SHIFT; if (!nbits) return; @@ -595,13 +611,50 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) DEBUG_ENT("added %d entropy credits to %s\n", nbits, r->name); retry: entropy_count = orig = ACCESS_ONCE(r->entropy_count); - entropy_count += nbits; + if (nfrac < 0) { + /* Debit */ + entropy_count += nfrac; + } else { + /* + * Credit: we have to account for the possibility of + * overwriting already present entropy. Even in the + * ideal case of pure Shannon entropy, new contributions + * approach the full value asymptotically: + * + * entropy <- entropy + (pool_size - entropy) * + * (1 - exp(-add_entropy/pool_size)) + * + * For add_entropy <= pool_size/2 then + * (1 - exp(-add_entropy/pool_size)) >= + * (add_entropy/pool_size)*0.7869... + * so we can approximate the exponential with + * 3/4*add_entropy/pool_size and still be on the + * safe side by adding at most pool_size/2 at a time. + * + * The use of pool_size-2 in the while statement is to + * prevent rounding artifacts from making the loop + * arbitrarily long; this limits the loop to log2(pool_size)*2 + * turns no matter how large nbits is. + */ + int pnfrac = nfrac; + const int s = r->poolinfo->poolbitshift + ENTROPY_SHIFT + 2; + /* The +2 corresponds to the /4 in the denominator */ + + do { + unsigned int anfrac = min(pnfrac, pool_size/2); + unsigned int add = + ((pool_size - entropy_count)*anfrac*3) >> s; + + entropy_count += add; + pnfrac -= anfrac; + } while (unlikely(entropy_count < pool_size-2 && pnfrac)); + } if (entropy_count < 0) { DEBUG_ENT("negative entropy/overflow\n"); entropy_count = 0; - } else if (entropy_count > r->poolinfo->POOLBITS) - entropy_count = r->poolinfo->POOLBITS; + } else if (entropy_count > pool_size) + entropy_count = pool_size; if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) goto retry; @@ -614,16 +667,29 @@ static void credit_entropy_bits(struct entropy_store *r, int nbits) } } - trace_credit_entropy_bits(r->name, nbits, entropy_count, + trace_credit_entropy_bits(r->name, nbits, + entropy_count >> ENTROPY_SHIFT, r->entropy_total, _RET_IP_); /* should we wake readers? */ - if (r == &input_pool && entropy_count >= random_read_wakeup_thresh) { + if (r == &input_pool && + (entropy_count >> ENTROPY_SHIFT) >= random_read_wakeup_thresh) { wake_up_interruptible(&random_read_wait); kill_fasync(&fasync, SIGIO, POLL_IN); } } +static void credit_entropy_bits_safe(struct entropy_store *r, int nbits) +{ + const int nbits_max = (int)(~0U >> (ENTROPY_SHIFT + 1)); + + /* Cap the value to avoid overflows */ + nbits = min(nbits, nbits_max); + nbits = max(nbits, -nbits_max); + + credit_entropy_bits(r, nbits); +} + /********************************************************************* * * Entropy input management @@ -679,7 +745,7 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) preempt_disable(); /* if over the trickle threshold, use only 1 in 4096 samples */ - if (input_pool.entropy_count > trickle_thresh && + if (ENTROPY_BITS(&input_pool) > trickle_thresh && ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff)) goto out; @@ -819,8 +885,9 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) { __u32 tmp[OUTPUT_POOL_WORDS]; - if (r->pull && r->entropy_count < nbytes * 8 && - r->entropy_count < r->poolinfo->POOLBITS) { + if (r->pull && + r->entropy_count < (nbytes << (ENTROPY_SHIFT + 3)) && + r->entropy_count < r->poolinfo->poolfracbits) { /* If we're limited, always leave two wakeup worth's BITS */ int rsvd = r->limit ? 0 : random_read_wakeup_thresh/4; int bytes = nbytes; @@ -832,7 +899,8 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) DEBUG_ENT("going to reseed %s with %d bits " "(%zu of %d requested)\n", - r->name, bytes * 8, nbytes * 8, r->entropy_count); + r->name, bytes * 8, nbytes * 8, + r->entropy_count >> ENTROPY_SHIFT); bytes = extract_entropy(r->pull, tmp, bytes, random_read_wakeup_thresh / 8, rsvd); @@ -858,41 +926,44 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, { unsigned long flags; int wakeup_write = 0; + int have_bytes; + int entropy_count, orig; + size_t ibytes; /* Hold lock while accounting */ spin_lock_irqsave(&r->lock, flags); - BUG_ON(r->entropy_count > r->poolinfo->POOLBITS); + BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); DEBUG_ENT("trying to extract %zu bits from %s\n", nbytes * 8, r->name); /* Can we pull enough? */ - if (r->entropy_count / 8 < min + reserved) { - nbytes = 0; - } else { - int entropy_count, orig; retry: - entropy_count = orig = ACCESS_ONCE(r->entropy_count); + entropy_count = orig = ACCESS_ONCE(r->entropy_count); + have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); + ibytes = nbytes; + if (have_bytes < min + reserved) { + ibytes = 0; + } else { /* If limited, never pull more than available */ - if (r->limit && nbytes + reserved >= entropy_count / 8) - nbytes = entropy_count/8 - reserved; - - if (entropy_count / 8 >= nbytes + reserved) { - entropy_count -= nbytes*8; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) - goto retry; - } else { - entropy_count = reserved; - if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) - goto retry; - } + if (r->limit && ibytes + reserved >= have_bytes) + ibytes = have_bytes - reserved; + + if (have_bytes >= ibytes + reserved) + entropy_count -= ibytes << (ENTROPY_SHIFT + 3); + else + entropy_count = reserved << (ENTROPY_SHIFT + 3); - if (entropy_count < random_write_wakeup_thresh) + if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig) + goto retry; + + if ((r->entropy_count >> ENTROPY_SHIFT) + < random_write_wakeup_thresh) wakeup_write = 1; } DEBUG_ENT("debiting %zu entropy credits from %s%s\n", - nbytes * 8, r->name, r->limit ? "" : " (unlimited)"); + ibytes * 8, r->name, r->limit ? "" : " (unlimited)"); spin_unlock_irqrestore(&r->lock, flags); @@ -901,7 +972,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, kill_fasync(&fasync, SIGIO, POLL_OUT); } - return nbytes; + return ibytes; } static void extract_buf(struct entropy_store *r, __u8 *out) @@ -979,7 +1050,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, r->last_data_init = true; spin_unlock_irqrestore(&r->lock, flags); trace_extract_entropy(r->name, EXTRACT_SIZE, - r->entropy_count, _RET_IP_); + ENTROPY_BITS(r), _RET_IP_); xfer_secondary_pool(r, EXTRACT_SIZE); extract_buf(r, tmp); spin_lock_irqsave(&r->lock, flags); @@ -988,7 +1059,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf, spin_unlock_irqrestore(&r->lock, flags); } - trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_); + trace_extract_entropy(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, min, reserved); @@ -1021,7 +1092,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf, ssize_t ret = 0, i; __u8 tmp[EXTRACT_SIZE]; - trace_extract_entropy_user(r->name, nbytes, r->entropy_count, _RET_IP_); + trace_extract_entropy_user(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_); xfer_secondary_pool(r, nbytes); nbytes = account(r, nbytes, 0, 0); @@ -1117,7 +1188,7 @@ static void init_std_data(struct entropy_store *r) r->entropy_total = 0; r->last_data_init = false; mix_pool_bytes(r, &now, sizeof(now), NULL); - for (i = r->poolinfo->POOLBYTES; i > 0; i -= sizeof(rv)) { + for (i = r->poolinfo->poolbytes; i > 0; i -= sizeof(rv)) { if (!arch_get_random_long(&rv)) break; mix_pool_bytes(r, &rv, sizeof(rv), NULL); @@ -1193,8 +1264,8 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) DEBUG_ENT("sleeping?\n"); wait_event_interruptible(random_read_wait, - input_pool.entropy_count >= - random_read_wakeup_thresh); + ENTROPY_BITS(&input_pool) >= + random_read_wakeup_thresh); DEBUG_ENT("awake\n"); @@ -1230,9 +1301,9 @@ random_poll(struct file *file, poll_table * wait) poll_wait(file, &random_read_wait, wait); poll_wait(file, &random_write_wait, wait); mask = 0; - if (input_pool.entropy_count >= random_read_wakeup_thresh) + if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_thresh) mask |= POLLIN | POLLRDNORM; - if (input_pool.entropy_count < random_write_wakeup_thresh) + if (ENTROPY_BITS(&input_pool) < random_write_wakeup_thresh) mask |= POLLOUT | POLLWRNORM; return mask; } @@ -1283,7 +1354,8 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) switch (cmd) { case RNDGETENTCNT: /* inherently racy, no point locking */ - if (put_user(input_pool.entropy_count, p)) + ent_count = ENTROPY_BITS(&input_pool); + if (put_user(ent_count, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: @@ -1291,7 +1363,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (get_user(ent_count, p)) return -EFAULT; - credit_entropy_bits(&input_pool, ent_count); + credit_entropy_bits_safe(&input_pool, ent_count); return 0; case RNDADDENTROPY: if (!capable(CAP_SYS_ADMIN)) @@ -1306,7 +1378,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) size); if (retval < 0) return retval; - credit_entropy_bits(&input_pool, ent_count); + credit_entropy_bits_safe(&input_pool, ent_count); return 0; case RNDZAPENTCNT: case RNDCLEARPOOL: @@ -1413,6 +1485,23 @@ static int proc_do_uuid(ctl_table *table, int write, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } +/* + * Return entropy available scaled to integral bits + */ +static int proc_do_entropy(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + ctl_table fake_table; + int entropy_count; + + entropy_count = *(int *)table->data >> ENTROPY_SHIFT; + + fake_table.data = &entropy_count; + fake_table.maxlen = sizeof(entropy_count); + + return proc_dointvec(&fake_table, write, buffer, lenp, ppos); +} + static int sysctl_poolsize = INPUT_POOL_WORDS * 32; extern ctl_table random_table[]; ctl_table random_table[] = { @@ -1427,7 +1516,7 @@ ctl_table random_table[] = { .procname = "entropy_avail", .maxlen = sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_do_entropy, .data = &input_pool.entropy_count, }, { diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 010d814dd9f56b..538856f3e68aba 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -148,7 +148,8 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) crq.len = (u16)count; crq.data = ibmvtpm->rtce_dma_handle; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, word[0], word[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(word[0]), + cpu_to_be64(word[1])); if (rc != H_SUCCESS) { dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc); rc = 0; @@ -186,7 +187,8 @@ static int ibmvtpm_crq_get_rtce_size(struct ibmvtpm_dev *ibmvtpm) crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_GET_RTCE_BUFFER_SIZE; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, buf[0], buf[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]), + cpu_to_be64(buf[1])); if (rc != H_SUCCESS) dev_err(ibmvtpm->dev, "ibmvtpm_crq_get_rtce_size failed rc=%d\n", rc); @@ -212,7 +214,8 @@ static int ibmvtpm_crq_get_version(struct ibmvtpm_dev *ibmvtpm) crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_GET_VERSION; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, buf[0], buf[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]), + cpu_to_be64(buf[1])); if (rc != H_SUCCESS) dev_err(ibmvtpm->dev, "ibmvtpm_crq_get_version failed rc=%d\n", rc); @@ -335,7 +338,8 @@ static int tpm_ibmvtpm_suspend(struct device *dev) crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_PREPARE_TO_SUSPEND; - rc = ibmvtpm_send_crq(ibmvtpm->vdev, buf[0], buf[1]); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(buf[0]), + cpu_to_be64(buf[1])); if (rc != H_SUCCESS) dev_err(ibmvtpm->dev, "tpm_ibmvtpm_suspend failed rc=%d\n", rc); @@ -519,11 +523,11 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq, case IBMVTPM_VALID_CMD: switch (crq->msg) { case VTPM_GET_RTCE_BUFFER_SIZE_RES: - if (crq->len <= 0) { + if (be16_to_cpu(crq->len) <= 0) { dev_err(ibmvtpm->dev, "Invalid rtce size\n"); return; } - ibmvtpm->rtce_size = crq->len; + ibmvtpm->rtce_size = be16_to_cpu(crq->len); ibmvtpm->rtce_buf = kmalloc(ibmvtpm->rtce_size, GFP_KERNEL); if (!ibmvtpm->rtce_buf) { @@ -544,11 +548,11 @@ static void ibmvtpm_crq_process(struct ibmvtpm_crq *crq, return; case VTPM_GET_VERSION_RES: - ibmvtpm->vtpm_version = crq->data; + ibmvtpm->vtpm_version = be32_to_cpu(crq->data); return; case VTPM_TPM_COMMAND_RES: /* len of the data in rtce buffer */ - ibmvtpm->res_len = crq->len; + ibmvtpm->res_len = be16_to_cpu(crq->len); wake_up_interruptible(&ibmvtpm->wq); return; default: diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index be3203d61c46dc..f0bc75387cdfcc 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -155,6 +155,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) if (event == BUS_NOTIFY_ADD_DEVICE) { for (tmp = dev; tmp; tmp = tmp->bus->self) { level--; + info->path[level].bus = tmp->bus->number; info->path[level].device = PCI_SLOT(tmp->devfn); info->path[level].function = PCI_FUNC(tmp->devfn); if (pci_is_root_bus(tmp->bus)) @@ -177,17 +178,33 @@ static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus, int i; if (info->bus != bus) - return false; + goto fallback; if (info->level != count) - return false; + goto fallback; for (i = 0; i < count; i++) { if (path[i].device != info->path[i].device || path[i].function != info->path[i].function) - return false; + goto fallback; } return true; + +fallback: + + if (count != 1) + return false; + + i = info->level - 1; + if (bus == info->path[i].bus && + path[0].device == info->path[i].device && + path[0].function == info->path[i].function) { + pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n", + bus, path[0].device, path[0].function); + return true; + } + + return false; } /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 2ddc399b76aab4..0d67f4337af106 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -793,6 +793,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) * For timestamping add flag to skb_shinfo and * set flag for further reference */ + tx_info->ts_requested = 0; if (ring->hwtstamp_tx_type == HWTSTAMP_TX_ON && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 3fc3f06e536420..1d24d380c8edde 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -42,9 +42,7 @@ static struct team_port *team_port_get_rcu(const struct net_device *dev) { - struct team_port *port = rcu_dereference(dev->rx_handler_data); - - return team_port_exists(dev) ? port : NULL; + return rcu_dereference(dev->rx_handler_data); } static struct team_port *team_port_get_rtnl(const struct net_device *dev) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 8c2beb9afaffba..bc9e187cf1a5ec 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -85,6 +85,11 @@ static int ap_thread_flag = 0; module_param_named(poll_thread, ap_thread_flag, int, 0000); MODULE_PARM_DESC(poll_thread, "Turn on/off poll thread, default is 0 (off)."); +int ap_hwrng_seed = 1; +EXPORT_SYMBOL(ap_hwrng_seed); +module_param_named(hwrng_seed, ap_hwrng_seed, int, S_IRUSR|S_IRGRP); +MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on)."); + static struct device *ap_root_device = NULL; static struct ap_config_info *ap_configuration; static DEFINE_SPINLOCK(ap_device_list_lock); @@ -913,10 +918,15 @@ static int ap_device_probe(struct device *dev) int rc; ap_dev->drv = ap_drv; + + spin_lock_bh(&ap_device_list_lock); + list_add(&ap_dev->list, &ap_device_list); + spin_unlock_bh(&ap_device_list_lock); + rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV; - if (!rc) { + if (rc) { spin_lock_bh(&ap_device_list_lock); - list_add(&ap_dev->list, &ap_device_list); + list_del_init(&ap_dev->list); spin_unlock_bh(&ap_device_list_lock); } return rc; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 055a0f956d1718..63888bc8ce2e07 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -38,6 +38,7 @@ #define AP_POLL_TIME 1 /* Time in ticks between receive polls. */ extern int ap_domain_index; +extern int ap_hwrng_seed; /** * The ap_qid_t identifier of an ap queue. It contains a diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 4b824b15194f6e..6927e764ad8dc1 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1374,6 +1374,7 @@ static int zcrypt_rng_data_read(struct hwrng *rng, u32 *data) static struct hwrng zcrypt_rng_dev = { .name = "zcrypt", .data_read = zcrypt_rng_data_read, + .quality = 990, }; static int zcrypt_rng_device_add(void) @@ -1388,6 +1389,8 @@ static int zcrypt_rng_device_add(void) goto out; } zcrypt_rng_buffer_index = 0; + if (!ap_hwrng_seed) + zcrypt_rng_dev.quality = 0; rc = hwrng_register(&zcrypt_rng_dev); if (rc) goto out_free; diff --git a/fs/exec.c b/fs/exec.c index d5973c8138687c..c7bcfee62aa5af 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -748,18 +748,17 @@ EXPORT_SYMBOL(setup_arg_pages); #endif /* CONFIG_MMU */ -struct file *open_exec(const char *name) +static struct file *do_open_exec(struct filename *name) { struct file *file; int err; - struct filename tmp = { .name = name }; static const struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC | MAY_OPEN, .intent = LOOKUP_OPEN }; - file = do_filp_open(AT_FDCWD, &tmp, &open_exec_flags, LOOKUP_FOLLOW); + file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW); if (IS_ERR(file)) goto out; @@ -783,6 +782,18 @@ struct file *open_exec(const char *name) fput(file); return ERR_PTR(err); } + +struct file *open_exec(const char *name) +{ + struct filename *filename = getname_kernel(name); + struct file *f = ERR_CAST(filename); + + if (!IS_ERR(filename)) { + f = do_open_exec(filename); + putname(filename); + } + return f; +} EXPORT_SYMBOL(open_exec); int kernel_read(struct file *file, loff_t offset, @@ -1164,7 +1175,7 @@ int prepare_bprm_creds(struct linux_binprm *bprm) return -ENOMEM; } -void free_bprm(struct linux_binprm *bprm) +static void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { @@ -1451,7 +1462,7 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -static int do_execve_common(const char *filename, +static int do_execve_common(struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp) { @@ -1462,6 +1473,9 @@ static int do_execve_common(const char *filename, int retval; const struct cred *cred = current_cred(); + if (IS_ERR(filename)) + return PTR_ERR(filename); + /* * We move the actual failure in case of RLIMIT_NPROC excess from * set*uid() to execve() because too many poorly written programs @@ -1497,7 +1511,7 @@ static int do_execve_common(const char *filename, clear_in_exec = retval; current->in_execve = 1; - file = open_exec(filename); + file = do_open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1505,8 +1519,7 @@ static int do_execve_common(const char *filename, sched_exec(); bprm->file = file; - bprm->filename = filename; - bprm->interp = filename; + bprm->filename = bprm->interp = filename->name; retval = bprm_mm_init(bprm); if (retval) @@ -1547,6 +1560,7 @@ static int do_execve_common(const char *filename, acct_update_integrals(current); task_numa_free(current); free_bprm(bprm); + putname(filename); if (displaced) put_files_struct(displaced); return retval; @@ -1575,10 +1589,11 @@ static int do_execve_common(const char *filename, if (displaced) reset_files_struct(displaced); out_ret: + putname(filename); return retval; } -int do_execve(const char *filename, +int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) { @@ -1588,7 +1603,7 @@ int do_execve(const char *filename, } #ifdef CONFIG_COMPAT -static int compat_do_execve(const char *filename, +static int compat_do_execve(struct filename *filename, const compat_uptr_t __user *__argv, const compat_uptr_t __user *__envp) { @@ -1683,25 +1698,13 @@ SYSCALL_DEFINE3(execve, const char __user *const __user *, argv, const char __user *const __user *, envp) { - struct filename *path = getname(filename); - int error = PTR_ERR(path); - if (!IS_ERR(path)) { - error = do_execve(path->name, argv, envp); - putname(path); - } - return error; + return do_execve(getname(filename), argv, envp); } #ifdef CONFIG_COMPAT asmlinkage long compat_sys_execve(const char __user * filename, const compat_uptr_t __user * argv, const compat_uptr_t __user * envp) { - struct filename *path = getname(filename); - int error = PTR_ERR(path); - if (!IS_ERR(path)) { - error = compat_do_execve(path->name, argv, envp); - putname(path); - } - return error; + return compat_do_execve(getname(filename), argv, envp); } #endif diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6afdcc9b9f9c1c..f64e45bf01b302 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -920,6 +920,22 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, return error; } +static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags) +{ + int error; + struct gfs2_inode *ip = GFS2_I(out->f_mapping->host); + + error = gfs2_rs_alloc(ip); + if (error) + return (ssize_t)error; + + gfs2_size_hint(out, *ppos, len); + + return generic_file_splice_write(pipe, out, ppos, len, flags); +} + #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** @@ -1086,7 +1102,7 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = gfs2_setlease, .fallocate = gfs2_fallocate, }; @@ -1116,7 +1132,7 @@ const struct file_operations gfs2_file_fops_nolock = { .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = gfs2_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/namei.c b/fs/namei.c index cb238b752718d0..fb997215ed497c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -117,15 +117,6 @@ * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */ -void final_putname(struct filename *name) -{ - if (name->separate) { - __putname(name->name); - kfree(name); - } else { - __putname(name); - } -} #define EMBEDDED_NAME_MAX (PATH_MAX - sizeof(struct filename)) @@ -144,6 +135,7 @@ getname_flags(const char __user *filename, int flags, int *empty) result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); + result->refcnt = 1; /* * First, try to embed the struct filename inside the names_cache @@ -178,6 +170,7 @@ getname_flags(const char __user *filename, int flags, int *empty) } result->name = kname; result->separate = true; + result->refcnt = 1; max = PATH_MAX; goto recopy; } @@ -196,11 +189,12 @@ getname_flags(const char __user *filename, int flags, int *empty) goto error; result->uptr = filename; + result->aname = NULL; audit_getname(result); return result; error: - final_putname(result); + putname(result); return err; } @@ -210,14 +204,56 @@ getname(const char __user * filename) return getname_flags(filename, 0, NULL); } -#ifdef CONFIG_AUDITSYSCALL +struct filename * +getname_kernel(const char * filename) +{ + struct filename *result; + int len = strlen(filename) + 1; + + result = __getname(); + if (unlikely(!result)) + return ERR_PTR(-ENOMEM); + + if (len <= EMBEDDED_NAME_MAX) { + result->name = (char *)(result) + sizeof(*result); + result->separate = false; + } else if (len <= PATH_MAX) { + struct filename *tmp; + + tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + if (unlikely(!tmp)) { + __putname(result); + return ERR_PTR(-ENOMEM); + } + tmp->name = (char *)result; + tmp->separate = true; + result = tmp; + } else { + __putname(result); + return ERR_PTR(-ENAMETOOLONG); + } + memcpy((char *)result->name, filename, len); + result->uptr = NULL; + result->aname = NULL; + result->refcnt = 1; + audit_getname(result); + + return result; +} + void putname(struct filename *name) { - if (unlikely(!audit_dummy_context())) - return audit_putname(name); - final_putname(name); + BUG_ON(name->refcnt <= 0); + + if (--name->refcnt > 0) + return; + + if (name->separate) { + __putname(name->name); + kfree(name); + } else + __putname(name); } -#endif static int check_acl(struct inode *inode, int mask) { @@ -2005,31 +2041,47 @@ static int filename_lookup(int dfd, struct filename *name, static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) { - struct filename filename = { .name = name }; + struct filename *filename = getname_kernel(name); + int retval = PTR_ERR(filename); - return filename_lookup(dfd, &filename, flags, nd); + if (!IS_ERR(filename)) { + retval = filename_lookup(dfd, filename, flags, nd); + putname(filename); + } + return retval; } /* does lookup, returns the object with parent locked */ struct dentry *kern_path_locked(const char *name, struct path *path) { + struct filename *filename = getname_kernel(name); struct nameidata nd; struct dentry *d; - int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd); - if (err) - return ERR_PTR(err); + int err; + + if (IS_ERR(filename)) + return ERR_CAST(filename); + + err = filename_lookup(AT_FDCWD, filename, LOOKUP_PARENT, &nd); + if (err) { + d = ERR_PTR(err); + goto out; + } if (nd.last_type != LAST_NORM) { path_put(&nd.path); - return ERR_PTR(-EINVAL); + d = ERR_PTR(-EINVAL); + goto out; } mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); d = __lookup_hash(&nd.last, nd.path.dentry, 0); if (IS_ERR(d)) { mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); - return d; + goto out; } *path = nd.path; +out: + putname(filename); return d; } @@ -2326,13 +2378,17 @@ static int filename_mountpoint(int dfd, struct filename *s, struct path *path, unsigned int flags) { - int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); + int error; + if (IS_ERR(s)) + return PTR_ERR(s); + error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU); if (unlikely(error == -ECHILD)) error = path_mountpoint(dfd, s->name, path, flags); if (unlikely(error == -ESTALE)) error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL); if (likely(!error)) audit_inode(s, path->dentry, 0); + putname(s); return error; } @@ -2354,21 +2410,14 @@ int user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags, struct path *path) { - struct filename *s = getname(name); - int error; - if (IS_ERR(s)) - return PTR_ERR(s); - error = filename_mountpoint(dfd, s, path, flags); - putname(s); - return error; + return filename_mountpoint(dfd, getname(name), path, flags); } int kern_path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags) { - struct filename s = {.name = name}; - return filename_mountpoint(dfd, &s, path, flags); + return filename_mountpoint(dfd, getname_kernel(name), path, flags); } EXPORT_SYMBOL(kern_path_mountpoint); @@ -3204,7 +3253,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, { struct nameidata nd; struct file *file; - struct filename filename = { .name = name }; + struct filename *filename; nd.root.mnt = mnt; nd.root.dentry = dentry; @@ -3214,15 +3263,20 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); - file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU); + filename = getname_kernel(name); + if (unlikely(IS_ERR(filename))) + return ERR_CAST(filename); + + file = path_openat(-1, filename, &nd, op, flags | LOOKUP_RCU); if (unlikely(file == ERR_PTR(-ECHILD))) - file = path_openat(-1, &filename, &nd, op, flags); + file = path_openat(-1, filename, &nd, op, flags); if (unlikely(file == ERR_PTR(-ESTALE))) - file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_REVAL); + file = path_openat(-1, filename, &nd, op, flags | LOOKUP_REVAL); + putname(filename); return file; } -struct dentry *kern_path_create(int dfd, const char *pathname, +static struct dentry *filename_create(int dfd, struct filename *name, struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); @@ -3237,7 +3291,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, */ lookup_flags &= LOOKUP_REVAL; - error = do_path_lookup(dfd, pathname, LOOKUP_PARENT|lookup_flags, &nd); + error = filename_lookup(dfd, name, LOOKUP_PARENT|lookup_flags, &nd); if (error) return ERR_PTR(error); @@ -3291,6 +3345,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, path_put(&nd.path); return dentry; } + +struct dentry *kern_path_create(int dfd, const char *pathname, + struct path *path, unsigned int lookup_flags) +{ + struct filename *filename = getname_kernel(pathname); + struct dentry *res; + + if (IS_ERR(filename)) + return ERR_CAST(filename); + res = filename_create(dfd, filename, path, lookup_flags); + putname(filename); + return res; +} EXPORT_SYMBOL(kern_path_create); void done_path_create(struct path *path, struct dentry *dentry) @@ -3309,7 +3376,7 @@ struct dentry *user_path_create(int dfd, const char __user *pathname, struct dentry *res; if (IS_ERR(tmp)) return ERR_CAST(tmp); - res = kern_path_create(dfd, tmp->name, path, lookup_flags); + res = filename_create(dfd, tmp, path, lookup_flags); putname(tmp); return res; } diff --git a/fs/open.c b/fs/open.c index 5cf4cfd348d4c6..7fbf3330a7053a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -979,8 +979,14 @@ struct file *file_open_name(struct filename *name, int flags, umode_t mode) */ struct file *filp_open(const char *filename, int flags, umode_t mode) { - struct filename name = {.name = filename}; - return file_open_name(&name, flags, mode); + struct filename *name = getname_kernel(filename); + struct file *file = ERR_CAST(name); + + if (!IS_ERR(name)) { + file = file_open_name(name, flags, mode); + putname(name); + } + return file; } EXPORT_SYMBOL(filp_open); diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 821eae8cbd8cf1..3b4af1d7c7e91c 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -55,15 +55,28 @@ extern const struct crypto_type crypto_ahash_type; int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err); int crypto_hash_walk_first(struct ahash_request *req, struct crypto_hash_walk *walk); +int crypto_ahash_walk_first(struct ahash_request *req, + struct crypto_hash_walk *walk); int crypto_hash_walk_first_compat(struct hash_desc *hdesc, struct crypto_hash_walk *walk, struct scatterlist *sg, unsigned int len); +static inline int crypto_ahash_walk_done(struct crypto_hash_walk *walk, + int err) +{ + return crypto_hash_walk_done(walk, err); +} + static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) { return !(walk->entrylen | walk->total); } +static inline int crypto_ahash_walk_last(struct crypto_hash_walk *walk) +{ + return crypto_hash_walk_last(walk); +} + int crypto_register_ahash(struct ahash_alg *alg); int crypto_unregister_ahash(struct ahash_alg *alg); int ahash_register_instance(struct crypto_template *tmpl, @@ -104,6 +117,15 @@ int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc); +int shash_ahash_mcryptd_update(struct ahash_request *req, + struct shash_desc *desc); +int shash_ahash_mcryptd_final(struct ahash_request *req, + struct shash_desc *desc); +int shash_ahash_mcryptd_finup(struct ahash_request *req, + struct shash_desc *desc); +int shash_ahash_mcryptd_digest(struct ahash_request *req, + struct shash_desc *desc); + int crypto_init_shash_ops_async(struct crypto_tfm *tfm); static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm) diff --git a/include/crypto/mcryptd.h b/include/crypto/mcryptd.h new file mode 100644 index 00000000000000..c23ee1f7ee8062 --- /dev/null +++ b/include/crypto/mcryptd.h @@ -0,0 +1,112 @@ +/* + * Software async multibuffer crypto daemon headers + * + * Author: + * Tim Chen + * + * Copyright (c) 2014, Intel Corporation. + */ + +#ifndef _CRYPTO_MCRYPT_H +#define _CRYPTO_MCRYPT_H + +#include +#include +#include + +struct mcryptd_ahash { + struct crypto_ahash base; +}; + +static inline struct mcryptd_ahash *__mcryptd_ahash_cast( + struct crypto_ahash *tfm) +{ + return (struct mcryptd_ahash *)tfm; +} + +struct mcryptd_cpu_queue { + struct crypto_queue queue; + struct work_struct work; +}; + +struct mcryptd_queue { + struct mcryptd_cpu_queue __percpu *cpu_queue; +}; + +struct mcryptd_instance_ctx { + struct crypto_spawn spawn; + struct mcryptd_queue *queue; +}; + +struct mcryptd_hash_ctx { + struct crypto_shash *child; + struct mcryptd_alg_state *alg_state; +}; + +struct mcryptd_tag { + /* seq number of request */ + unsigned seq_num; + /* arrival time of request */ + unsigned long arrival; + unsigned long expire; + int cpu; +}; + +struct mcryptd_hash_request_ctx { + struct list_head waiter; + crypto_completion_t complete; + struct mcryptd_tag tag; + struct crypto_hash_walk walk; + u8 *out; + int flag; + struct shash_desc desc; +}; + +struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name, + u32 type, u32 mask); +struct crypto_shash *mcryptd_ahash_child(struct mcryptd_ahash *tfm); +struct shash_desc *mcryptd_shash_desc(struct ahash_request *req); +void mcryptd_free_ahash(struct mcryptd_ahash *tfm); +void mcryptd_flusher(struct work_struct *work); + +enum mcryptd_req_type { + MCRYPTD_NONE, + MCRYPTD_UPDATE, + MCRYPTD_FINUP, + MCRYPTD_DIGEST, + MCRYPTD_FINAL +}; + +struct mcryptd_alg_cstate { + unsigned long next_flush; + unsigned next_seq_num; + bool flusher_engaged; + struct delayed_work flush; + int cpu; + struct mcryptd_alg_state *alg_state; + void *mgr; + spinlock_t work_lock; + struct list_head work_list; + struct list_head flush_list; +}; + +struct mcryptd_alg_state { + struct mcryptd_alg_cstate __percpu *alg_cstate; + unsigned long (*flusher)(struct mcryptd_alg_cstate *cstate); +}; + +/* return delay in jiffies from current time */ +static inline unsigned long get_delay(unsigned long t) +{ + long delay; + + delay = (long) t - (long) jiffies; + if (delay <= 0) + return 0; + else + return (unsigned long) delay; +} + +void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay); + +#endif diff --git a/include/linux/audit.h b/include/linux/audit.h index cb5bee871c37f3..61f797b4f7bd3a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -46,6 +46,7 @@ struct audit_tree; struct audit_krule { int vers_ops; + u32 pflags; u32 flags; u32 listnr; u32 action; @@ -63,6 +64,9 @@ struct audit_krule { u64 prio; }; +/* Flag to indicate legacy AUDIT_LOGINUID unset usage */ +#define AUDIT_LOGINUID_LEGACY 0x1 + struct audit_field { u32 type; u32 val; @@ -105,7 +109,6 @@ extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); -extern void audit_putname(struct filename *name); #define AUDIT_INODE_PARENT 1 /* dentry represents the parent */ #define AUDIT_INODE_HIDDEN 2 /* audit record should be hidden */ @@ -324,8 +327,6 @@ static inline struct filename *audit_reusename(const __user char *name) } static inline void audit_getname(struct filename *name) { } -static inline void audit_putname(struct filename *name) -{ } static inline void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index df97ca4aae5200..7986b173d91260 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -114,7 +114,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv, extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); -extern void free_bprm(struct linux_binprm *); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 23c8db129560c6..f7bb3f62c09fba 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -56,13 +56,19 @@ struct dmar_drhd_unit { struct intel_iommu *iommu; }; +struct dmar_pci_path { + u8 bus; + u8 device; + u8 function; +}; + struct dmar_pci_notify_info { struct pci_dev *dev; unsigned long event; int bus; u16 seg; u16 level; - struct acpi_dmar_pci_path path[]; + struct dmar_pci_path path[]; } __attribute__((packed)); extern struct rw_semaphore dmar_global_lock; diff --git a/include/linux/fs.h b/include/linux/fs.h index 664b4d16481878..ebc5aaa646d433 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2169,6 +2169,9 @@ struct filename { const __user char *uptr; /* original userland pointer */ struct audit_names *aname; bool separate; /* should "name" be freed? */ +#ifndef __GENKSYMS__ + int refcnt; +#endif }; extern long vfs_truncate(struct path *, loff_t); @@ -2187,6 +2190,8 @@ extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname(const char __user *); +extern struct filename *getname_kernel(const char *); +extern void putname(struct filename *name); enum { FILE_CREATED = 1, @@ -2207,15 +2212,8 @@ extern void __init vfs_caches_init(unsigned long); extern struct kmem_cache *names_cachep; -extern void final_putname(struct filename *name); - #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) -#ifndef CONFIG_AUDITSYSCALL -#define putname(name) final_putname(name) -#else -extern void putname(struct filename *name); -#endif #ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a78680a92dba3a..661c0aeef1c4bc 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -101,8 +101,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, new_dir_mask |= FS_ISDIR; } - fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie); - fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie); + fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name, + fs_cookie); + fsnotify(new_dir, new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_name, + fs_cookie); if (target) fsnotify_link_count(target); diff --git a/include/linux/sched.h b/include/linux/sched.h index c322878af9e690..d49ce8e764e98f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -62,6 +62,7 @@ struct bio_list; struct fs_struct; struct perf_event_context; struct blk_plug; +struct filename; /* * List of flags we want to share for kernel threads, @@ -99,6 +100,7 @@ extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); extern int nr_processes(void); extern unsigned long nr_running(void); +extern bool single_task_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); extern unsigned long this_cpu_load(void); @@ -2329,7 +2331,7 @@ extern void do_group_exit(int); extern int allow_signal(int); extern int disallow_signal(int); -extern int do_execve(const char *, +extern int do_execve(struct filename *, const char __user * const __user *, const char __user * const __user *); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); diff --git a/init/main.c b/init/main.c index e74b65f43296b8..8678d709aacc1b 100644 --- a/init/main.c +++ b/init/main.c @@ -873,7 +873,7 @@ void __init load_default_modules(void) static int run_init_process(const char *init_filename) { argv_init[0] = init_filename; - return do_execve(init_filename, + return do_execve(getname_kernel(init_filename), (const char __user *const __user *)argv_init, (const char __user *const __user *)envp_init); } diff --git a/kernel/audit.h b/kernel/audit.h index ab586ca12b4ab5..e2a59f3fdf87d7 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -24,11 +24,9 @@ #include #include -/* 0 = no checking - 1 = put_count checking - 2 = verbose put_count checking -*/ +#ifdef __GENKSYMS__ #define AUDIT_DEBUG 0 +#endif /* AUDIT_NAMES is the number of slots we reserve in the audit_context * for saving names from getname(). If we get more names we will allocate @@ -74,9 +72,8 @@ struct audit_cap_data { }; }; -/* When fs/namei.c:getname() is called, we store the pointer in name and - * we don't let putname() free it (instead we free all of the saved - * pointers at syscall exit time). +/* When fs/namei.c:getname() is called, we store the pointer in name and bump + * the refcnt in the associated filename struct. * * Further, in fs/namei.c:path_lookup() we store the inode and device. */ @@ -85,7 +82,9 @@ struct audit_names { struct filename *name; int name_len; /* number of chars to log */ +#ifdef __GENKSYMS__ bool name_put; /* call __putname()? */ +#endif unsigned long ino; dev_t dev; @@ -104,6 +103,13 @@ struct audit_names { * should be freed on syscall exit. */ bool should_free; + +#ifdef __GENKSYMS__ +#if AUDIT_DEBUG + int put_count; + int ino_count; +#endif +#endif }; /* The per-task audit context. */ @@ -202,11 +208,6 @@ struct audit_context { } execve; }; int fds[2]; - -#if AUDIT_DEBUG - int put_count; - int ino_count; -#endif }; extern int audit_ever_enabled; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 42c84cc258105d..71fbdd96c0371f 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -427,6 +427,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; f->val = 0; + entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } err = audit_field_valid(entry, f); @@ -602,6 +603,13 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->buflen += data->values[i] = audit_pack_string(&bufp, krule->filterkey); break; + case AUDIT_LOGINUID_SET: + if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) { + data->fields[i] = AUDIT_LOGINUID; + data->values[i] = AUDIT_UID_UNSET; + break; + } + /* fallthrough if set */ default: data->values[i] = f->val; } @@ -618,6 +626,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) int i; if (a->flags != b->flags || + a->pflags != b->pflags || a->listnr != b->listnr || a->action != b->action || a->field_count != b->field_count) @@ -736,6 +745,7 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old) new = &entry->rule; new->vers_ops = old->vers_ops; new->flags = old->flags; + new->pflags = old->pflags; new->listnr = old->listnr; new->action = old->action; for (i = 0; i < AUDIT_BITMASK_SIZE; i++) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 01f5de1b9bd414..92a3d4c32993ba 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -858,34 +858,10 @@ static inline void audit_free_names(struct audit_context *context) { struct audit_names *n, *next; -#if AUDIT_DEBUG == 2 - if (context->put_count + context->ino_count != context->name_count) { - int i = 0; - - printk(KERN_ERR "%s:%d(:%d): major=%d in_syscall=%d" - " name_count=%d put_count=%d" - " ino_count=%d [NOT freeing]\n", - __FILE__, __LINE__, - context->serial, context->major, context->in_syscall, - context->name_count, context->put_count, - context->ino_count); - list_for_each_entry(n, &context->names_list, list) { - printk(KERN_ERR "names[%d] = %p = %s\n", i++, - n->name, n->name->name ?: "(null)"); - } - dump_stack(); - return; - } -#endif -#if AUDIT_DEBUG - context->put_count = 0; - context->ino_count = 0; -#endif - list_for_each_entry_safe(n, next, &context->names_list, list) { list_del(&n->list); - if (n->name && n->name_put) - final_putname(n->name); + if (n->name) + putname(n->name); if (n->should_free) kfree(n); } @@ -1649,9 +1625,6 @@ static struct audit_names *audit_alloc_name(struct audit_context *context, list_add_tail(&aname->list, &context->names_list); context->name_count++; -#if AUDIT_DEBUG - context->ino_count++; -#endif return aname; } @@ -1672,8 +1645,10 @@ __audit_reusename(const __user char *uptr) list_for_each_entry(n, &context->names_list, list) { if (!n->name) continue; - if (n->name->uptr == uptr) + if (n->name->uptr == uptr) { + n->name->refcnt++; return n->name; + } } return NULL; } @@ -1690,19 +1665,8 @@ void __audit_getname(struct filename *name) struct audit_context *context = current->audit_context; struct audit_names *n; - if (!context->in_syscall) { -#if AUDIT_DEBUG == 2 - printk(KERN_ERR "%s:%d(:%d): ignoring getname(%p)\n", - __FILE__, __LINE__, context->serial, name); - dump_stack(); -#endif + if (!context->in_syscall) return; - } - -#if AUDIT_DEBUG - /* The filename _must_ have a populated ->name */ - BUG_ON(!name->name); -#endif n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN); if (!n) @@ -1710,57 +1674,13 @@ void __audit_getname(struct filename *name) n->name = name; n->name_len = AUDIT_NAME_FULL; - n->name_put = true; name->aname = n; + name->refcnt++; if (!context->pwd.dentry) get_fs_pwd(current->fs, &context->pwd); } -/* audit_putname - intercept a putname request - * @name: name to intercept and delay for putname - * - * If we have stored the name from getname in the audit context, - * then we delay the putname until syscall exit. - * Called from include/linux/fs.h:putname(). - */ -void audit_putname(struct filename *name) -{ - struct audit_context *context = current->audit_context; - - BUG_ON(!context); - if (!context->in_syscall) { -#if AUDIT_DEBUG == 2 - printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n", - __FILE__, __LINE__, context->serial, name); - if (context->name_count) { - struct audit_names *n; - int i = 0; - - list_for_each_entry(n, &context->names_list, list) - printk(KERN_ERR "name[%d] = %p = %s\n", i++, - n->name, n->name->name ?: "(null)"); - } -#endif - final_putname(name); - } -#if AUDIT_DEBUG - else { - ++context->put_count; - if (context->put_count > context->name_count) { - printk(KERN_ERR "%s:%d(:%d): major=%d" - " in_syscall=%d putname(%p) name_count=%d" - " put_count=%d\n", - __FILE__, __LINE__, - context->serial, context->major, - context->in_syscall, name->name, - context->name_count, context->put_count); - dump_stack(); - } - } -#endif -} - /** * __audit_inode - store the inode and device from a lookup * @name: name being audited @@ -1781,10 +1701,6 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, if (!name) goto out_alloc; -#if AUDIT_DEBUG - /* The struct filename _must_ have a populated ->name */ - BUG_ON(!name->name); -#endif /* * If we have a pointer to an audit_names entry already, then we can * just use it directly if the type is correct. @@ -1802,8 +1718,17 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, } list_for_each_entry_reverse(n, &context->names_list, list) { - /* does the name pointer match? */ - if (!n->name || n->name->name != name->name) + if (n->ino) { + /* valid inode number, use that for the comparison */ + if (n->ino != inode->i_ino || + n->dev != inode->i_sb->s_dev) + continue; + } else if (n->name) { + /* inode number has not been set, check the name */ + if (strcmp(n->name->name, name->name)) + continue; + } else + /* no inode and no name (?!) ... this is odd ... */ continue; /* match the correct record type */ @@ -1824,6 +1749,11 @@ void __audit_inode(struct filename *name, const struct dentry *dentry, n = audit_alloc_name(context, AUDIT_TYPE_NORMAL); if (!n) return; + if (name) { + n->name = name; + name->refcnt++; + } + out: if (parent) { n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL; @@ -1869,11 +1799,16 @@ void __audit_inode_child(const struct inode *parent, /* look for a parent entry first */ list_for_each_entry(n, &context->names_list, list) { - if (!n->name || n->type != AUDIT_TYPE_PARENT) + if (!n->name || + (n->type != AUDIT_TYPE_PARENT && + n->type != AUDIT_TYPE_UNKNOWN)) continue; - if (n->ino == parent->i_ino && - !audit_compare_dname_path(dname, n->name->name, n->name_len)) { + if (n->ino == parent->i_ino && n->dev == parent->i_sb->s_dev && + !audit_compare_dname_path(dname, + n->name->name, n->name_len)) { + if (n->type == AUDIT_TYPE_UNKNOWN) + n->type = AUDIT_TYPE_PARENT; found_parent = n; break; } @@ -1882,11 +1817,8 @@ void __audit_inode_child(const struct inode *parent, /* is there a matching child entry? */ list_for_each_entry(n, &context->names_list, list) { /* can only match entries that have a name */ - if (!n->name || n->type != type) - continue; - - /* if we found a parent, make sure this one is a child of it */ - if (found_parent && (n->name != found_parent->name)) + if (!n->name || + (n->type != type && n->type != AUDIT_TYPE_UNKNOWN)) continue; if (!strcmp(dname, n->name->name) || @@ -1894,6 +1826,8 @@ void __audit_inode_child(const struct inode *parent, found_parent ? found_parent->name_len : AUDIT_NAME_FULL)) { + if (n->type == AUDIT_TYPE_UNKNOWN) + n->type = type; found_child = n; break; } @@ -1918,10 +1852,10 @@ void __audit_inode_child(const struct inode *parent, if (found_parent) { found_child->name = found_parent->name; found_child->name_len = AUDIT_NAME_FULL; - /* don't call __putname() */ - found_child->name_put = false; + found_child->name->refcnt++; } } + if (inode) audit_copy_inode(found_child, dentry, inode); else diff --git a/kernel/kmod.c b/kernel/kmod.c index 8241906c4b61a0..7adfa06fbb63ae 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -236,7 +236,7 @@ static int ____call_usermodehelper(void *data) commit_creds(new); - retval = do_execve(sub_info->path, + retval = do_execve(getname_kernel(sub_info->path), (const char __user *const __user *)sub_info->argv, (const char __user *const __user *)sub_info->envp); if (!retval) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9bae2781b329fb..3ec583253724fa 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2206,6 +2206,18 @@ unsigned long nr_running(void) return sum; } +/* + * Check if only the current task is running on the cpu. + */ +bool single_task_running(void) +{ + if (cpu_rq(smp_processor_id())->nr_running == 1) + return true; + else + return false; +} +EXPORT_SYMBOL(single_task_running); + unsigned long long nr_context_switches(void) { int i; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 458f94c27c3714..054e47f4601f49 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2040,14 +2040,55 @@ static int __net_init ovs_init_net(struct net *net) return 0; } -static void __net_exit ovs_exit_net(struct net *net) +static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, + struct list_head *head) { - struct datapath *dp, *dp_next; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + struct datapath *dp; + + list_for_each_entry(dp, &ovs_net->dps, list_node) { + int i; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + + hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { + struct netdev_vport *netdev_vport; + + if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) + continue; + + netdev_vport = netdev_vport_priv(vport); + if (dev_net(netdev_vport->dev) == dnet) + list_add(&vport->detach_list, head); + } + } + } +} + +static void __net_exit ovs_exit_net(struct net *dnet) +{ + struct datapath *dp, *dp_next; + struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); + struct vport *vport, *vport_next; + struct net *net; + LIST_HEAD(head); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); + + rtnl_lock(); + for_each_net(net) + list_vports_from_net(net, dnet, &head); + rtnl_unlock(); + + /* Detach all vports from given namespace. */ + list_for_each_entry_safe(vport, vport_next, &head, detach_list) { + list_del(&vport->detach_list); + ovs_dp_detach_port(vport); + } + ovs_unlock(); cancel_work_sync(&ovs_net->dp_notify_work); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3e784066302b99..278161514a442e 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -93,6 +93,7 @@ struct vport_portids { * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @err_stats: Points to error statistics used and maintained by vport + * @detach_list: list used for detaching vport in net-exit call. */ struct vport { struct rcu_head rcu; @@ -107,6 +108,7 @@ struct vport { struct pcpu_tstats __percpu *percpu_stats; struct vport_err_stats err_stats; + struct list_head detach_list; }; /**