From fce1bcfe577b92922cc0910f7f91c82b9d1a3ef9 Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Fri, 15 May 2015 20:18:34 +0800 Subject: [PATCH 1/2] AVX SIMD implementation for ln --- include/openvml.h | 3 ++ include/openvml_kernel.h | 6 +++ include/openvml_macros.h | 9 ++++ include/openvml_reference.h | 3 ++ interface/CMakeLists.txt | 2 +- interface/ln.c | 39 ++++++++++++++ kernel/CMakeLists.txt | 2 +- kernel/arm/Kernel_generic.txt | 3 ++ kernel/generic/Kernel_generic.txt | 3 ++ kernel/generic/ln_kernel.c | 40 ++++++++++++++ kernel/x86_64/Kernel_generic.txt | 3 ++ kernel/x86_64/Kernel_haswell.txt | 3 ++ kernel/x86_64/Kernel_sandybridge.txt | 3 ++ kernel/x86_64/dln_kernel_avx.c | 78 ++++++++++++++++++++++++++++ kernel/x86_64/sln_kernel_avx.c | 62 ++++++++++++++++++++++ reference/CMakeLists.txt | 1 + reference/vln.c | 48 +++++++++++++++++ test/CMakeLists.txt | 1 + test/test_ln.c | 55 ++++++++++++++++++++ 19 files changed, 362 insertions(+), 2 deletions(-) create mode 100644 interface/ln.c create mode 100644 kernel/generic/ln_kernel.c create mode 100644 kernel/x86_64/dln_kernel_avx.c create mode 100644 kernel/x86_64/sln_kernel_avx.c create mode 100644 reference/vln.c create mode 100644 test/test_ln.c diff --git a/include/openvml.h b/include/openvml.h index 36cdba5..b963b5f 100644 --- a/include/openvml.h +++ b/include/openvml.h @@ -55,6 +55,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME(vdExp)(VML_INT n, const double * a, double OPENVML_EXPORT void OpenVML_FUNCNAME(vsLog10)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME(vdLog10)(VML_INT n, const double * a, double * y); +OPENVML_EXPORT void OpenVML_FUNCNAME(vsLn)(VML_INT n, const float * a, float * y); +OPENVML_EXPORT void OpenVML_FUNCNAME(vdLn)(VML_INT n, const double * a, double * y); + OPENVML_EXPORT void OpenVML_FUNCNAME(vsTanh)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME(vdTanh)(VML_INT n, const double * a, double * y); diff --git a/include/openvml_kernel.h b/include/openvml_kernel.h index edfbf5f..109ffac 100644 --- a/include/openvml_kernel.h +++ b/include/openvml_kernel.h @@ -60,6 +60,12 @@ void OpenVML_FUNCNAME(clog10_k)(VMLLONG n, float * a, float * b, float * y, floa void OpenVML_FUNCNAME(zlog10_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); +void OpenVML_FUNCNAME(sln_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); +void OpenVML_FUNCNAME(dln_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); +void OpenVML_FUNCNAME(cln_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); +void OpenVML_FUNCNAME(zln_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); + + void OpenVML_FUNCNAME(stanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); void OpenVML_FUNCNAME(dtanh_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); void OpenVML_FUNCNAME(ctanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); diff --git a/include/openvml_macros.h b/include/openvml_macros.h index 18a4134..b91ce2a 100644 --- a/include/openvml_macros.h +++ b/include/openvml_macros.h @@ -58,6 +58,11 @@ #define CLOG10_K OpenVML_FUNCNAME(clog10_k) #define ZLOG10_K OpenVML_FUNCNAME(zlog10_k) +#define SLN_K OpenVML_FUNCNAME(sln_k) +#define DLN_K OpenVML_FUNCNAME(dln_k) +#define CLN_K OpenVML_FUNCNAME(cln_k) +#define ZLN_K OpenVML_FUNCNAME(zln_k) + #define STANH_K OpenVML_FUNCNAME(stanh_k) #define DTANH_K OpenVML_FUNCNAME(dtanh_k) @@ -72,6 +77,7 @@ #define POW_K SPOW_K #define EXP_K SEXP_K #define LOG10_K SLOG10_K +#define LN_K SLN_K #define TANH_K STANH_K #else #define ADD_K DADD_K @@ -79,6 +85,7 @@ #define POW_K DPOW_K #define EXP_K DEXP_K #define LOG10_K DLOG10_K +#define LN_K DLN_K #define TANH_K DTANH_K #endif #else @@ -88,6 +95,7 @@ #define POW_K CPOW_K #define EXP_K CEXP_K #define LOG10_K CLOG10_K +#define LN_K CLN_K #define TANH_K CTANH_K #else #define ADD_K ZADD_K @@ -95,6 +103,7 @@ #define POW_K ZPOW_K #define EXP_K ZEXP_K #define LOG10_K ZLOG10_K +#define LN_K ZLN_K #define TANH_K ZTANH_K #endif #endif diff --git a/include/openvml_reference.h b/include/openvml_reference.h index 43badf5..815ac21 100644 --- a/include/openvml_reference.h +++ b/include/openvml_reference.h @@ -55,6 +55,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdExp)(VML_INT n, const double * a, dou OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsLog10)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdLog10)(VML_INT n, const double * a, double * y); +OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsLn)(VML_INT n, const float * a, float * y); +OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdLn)(VML_INT n, const double * a, double * y); + OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsTanh)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdTanh)(VML_INT n, const double * a, double * y); diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 9ab004f..11a297a 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -5,7 +5,7 @@ set(OpenVML_LIBSRC_D "") set(OpenVML_LIBSRC_C "") set(OpenVML_LIBSRC_Z "") -set(REAL_INTERFACE_LIST add sub pow exp tanh log10) +set(REAL_INTERFACE_LIST add sub pow exp tanh log10 ln) set(COMPLEX_INTERFACE_LIST add sub) function(cap_string var_name var_name_cap) diff --git a/interface/ln.c b/interface/ln.c new file mode 100644 index 0000000..f98dbf6 --- /dev/null +++ b/interface/ln.c @@ -0,0 +1,39 @@ +/* * Copyright (c) 2014, 2015 Zhang Xianyi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + + +void CNAME(VML_INT n, const VML_FLOAT * a, VML_FLOAT * y) { + + if (n<=0) return; + if (a==NULL || y==NULL) return; + + + EXEC_VML(0, LN_K, n, (VML_FLOAT*)a, NULL, y, NULL, NULL); + +} diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index e9d7f68..911414b 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -6,7 +6,7 @@ set(OpenVML_LIBSRC_D "") set(OpenVML_LIBSRC_C "") set(OpenVML_LIBSRC_Z "") -set(KERNEL_LIST add sub pow exp tanh log10) #s,d +set(KERNEL_LIST add sub pow exp tanh log10 ln) #s,d set(Z_KERNEL_LIST add sub) #c,z ######## s,d kernels diff --git a/kernel/arm/Kernel_generic.txt b/kernel/arm/Kernel_generic.txt index 355b610..dc95be3 100644 --- a/kernel/arm/Kernel_generic.txt +++ b/kernel/arm/Kernel_generic.txt @@ -17,5 +17,8 @@ set(exp_D_KERNEL_SOURCE generic/exp_kernel.c) set(log10_S_KERNEL_SOURCE generic/log10_kernel.c) set(log10_D_KERNEL_SOURCE generic/log10_kernel.c) +set(log10_S_KERNEL_SOURCE generic/ln_kernel.c) +set(log10_D_KERNEL_SOURCE generic/ln_kernel.c) + set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c) diff --git a/kernel/generic/Kernel_generic.txt b/kernel/generic/Kernel_generic.txt index acb2a49..06ad1f7 100644 --- a/kernel/generic/Kernel_generic.txt +++ b/kernel/generic/Kernel_generic.txt @@ -19,6 +19,9 @@ set(exp_D_KERNEL_SOURCE ${OpenVML_ARCH}/exp_kernel.c) set(log10_S_KERNEL_SOURCE ${OpenVML_ARCH}/log10_kernel.c) set(log10_D_KERNEL_SOURCE ${OpenVML_ARCH}/log10_kernel.c) +set(ln_S_KERNEL_SOURCE ${OpenVML_ARCH}/ln_kernel.c) +set(ln_D_KERNEL_SOURCE ${OpenVML_ARCH}/ln_kernel.c) + set(tanh_S_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c) diff --git a/kernel/generic/ln_kernel.c b/kernel/generic/ln_kernel.c new file mode 100644 index 0000000..bfce0b3 --- /dev/null +++ b/kernel/generic/ln_kernel.c @@ -0,0 +1,40 @@ +/* * Copyright (c) 2014, 2015 Zhang Xianyi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "openvml_kernel.h" + +#ifndef DOUBLE +#define LN logf +#else +#define LN log +#endif + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG i=0; + for(i=0; i +#include "openvml_kernel.h" + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG loop_count=(COMPSIZE*n) >> 5; + VMLLONG remain_count=(COMPSIZE*n) & 0x1f; + + int i=0; + + while(loop_count>0){ + + __m256d av0=_mm256_loadu_pd(a); + __m256d av1=_mm256_loadu_pd(a+4); + __m256d av2=_mm256_loadu_pd(a+8); + __m256d av3=_mm256_loadu_pd(a+12); + + __m256d av4=_mm256_loadu_pd(a+16); + __m256d av5=_mm256_loadu_pd(a+20); + __m256d av6=_mm256_loadu_pd(a+24); + __m256d av7=_mm256_loadu_pd(a+28); + + + + + __m256d yv0=_mm256_clog_pd(av0); + __m256d yv1=_mm256_clog_pd(av1); + __m256d yv2=_mm256_clog_pd(av2); + __m256d yv3=_mm256_clog_pd(av3); + + __m256d yv4=_mm256_clog_pd(av4); + __m256d yv5=_mm256_clog_pd(av5); + __m256d yv6=_mm256_clog_pd(av6); + __m256d yv7=_mm256_clog_pd(av7); + + _mm256_storeu_pd(y, yv0); + _mm256_storeu_pd(y+4, yv1); + _mm256_storeu_pd(y+8, yv2); + _mm256_storeu_pd(y+12, yv3); + + _mm256_storeu_pd(y+16, yv4); + _mm256_storeu_pd(y+20, yv5); + _mm256_storeu_pd(y+24, yv6); + _mm256_storeu_pd(y+28, yv7); + + a+=32; + y+=32; + loop_count--; + } + + for(i=0; i +#include "openvml_kernel.h" + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG loop_count=(COMPSIZE*n) >> 5; + VMLLONG remain_count=(COMPSIZE*n) & 0x1f; + + int i=0; + + while(loop_count>0){ + + __m256 av0=_mm256_loadu_ps(a); + __m256 av1=_mm256_loadu_ps(a+8); + __m256 av2=_mm256_loadu_ps(a+16); + __m256 av3=_mm256_loadu_ps(a+24); + + + __m256 yv0=_mm256_clog_ps(av0); + __m256 yv1=_mm256_clog_ps(av1); + __m256 yv2=_mm256_clog_ps(av2); + __m256 yv3=_mm256_clog_ps(av3); + + + _mm256_storeu_ps(y, yv0); + _mm256_storeu_ps(y+8, yv1); + _mm256_storeu_ps(y+16, yv2); + _mm256_storeu_ps(y+24, yv3); + + a+=32; + y+=32; + loop_count--; + } + + for(i=0; i +#include +#include + +void OpenVML_FUNCNAME_REF(vsLn)(VML_INT n, const float * a, float * y){ + VML_INT i; + if (n<=0) return; + if (a==NULL || y==NULL) return; + + for(i=0; i +#include +#include + +static char* funcname[4]={"vsLn", "vdLn", NULL,NULL}; +static double flop_per_elem[4]={0.0, 0.0, 0.0, 0.0}; + +static a_y_func_t ref_vLn[] = { + (a_y_func_t)OpenVML_FUNCNAME_REF(vsLn), + (a_y_func_t)OpenVML_FUNCNAME_REF(vdLn), + NULL, + NULL, +}; + +static a_y_func_t test_vLn[] = { + (a_y_func_t)OpenVML_FUNCNAME(vsLn), + (a_y_func_t)OpenVML_FUNCNAME(vdLn), + NULL, + NULL, +}; + + +CTEST2(check_result_s, ln){ + run_test_a_y(data->parameter, funcname, test_vLn, ref_vLn, flop_per_elem); +} + +CTEST2(check_result_d, ln){ + run_test_a_y(data->parameter, funcname, test_vLn, ref_vLn, flop_per_elem); +} From e175968d2e37abb5fab121df33f234e9e3c4ec10 Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Sun, 17 May 2015 17:34:39 +0800 Subject: [PATCH 2/2] AVX implementation of (s/d)log1p --- include/openvml.h | 3 ++ include/openvml_kernel.h | 6 +++ include/openvml_macros.h | 8 +++ include/openvml_reference.h | 3 ++ interface/CMakeLists.txt | 2 +- interface/log1p.c | 39 ++++++++++++++ kernel/CMakeLists.txt | 2 +- kernel/aarch64/Kernel_generic.txt | 7 ++- kernel/arm/Kernel_generic.txt | 3 ++ kernel/generic/Kernel_generic.txt | 4 +- kernel/generic/log1p_kernel.c | 40 ++++++++++++++ kernel/x86_64/Kernel_generic.txt | 3 ++ kernel/x86_64/Kernel_haswell.txt | 3 ++ kernel/x86_64/Kernel_sandybridge.txt | 3 ++ kernel/x86_64/dlog1p_kernel_avx.c | 78 ++++++++++++++++++++++++++++ kernel/x86_64/slog1p_kernel_avx.c | 63 ++++++++++++++++++++++ reference/CMakeLists.txt | 1 + reference/vlog1p.c | 48 +++++++++++++++++ test/CMakeLists.txt | 1 + test/test_log1p.c | 55 ++++++++++++++++++++ 20 files changed, 368 insertions(+), 4 deletions(-) create mode 100644 interface/log1p.c create mode 100644 kernel/generic/log1p_kernel.c create mode 100644 kernel/x86_64/dlog1p_kernel_avx.c create mode 100644 kernel/x86_64/slog1p_kernel_avx.c create mode 100644 reference/vlog1p.c create mode 100644 test/test_log1p.c diff --git a/include/openvml.h b/include/openvml.h index b963b5f..e7df1d5 100644 --- a/include/openvml.h +++ b/include/openvml.h @@ -58,6 +58,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME(vdLog10)(VML_INT n, const double * a, doubl OPENVML_EXPORT void OpenVML_FUNCNAME(vsLn)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME(vdLn)(VML_INT n, const double * a, double * y); +OPENVML_EXPORT void OpenVML_FUNCNAME(vsLog1p)(VML_INT n, const float * a, float * y); +OPENVML_EXPORT void OpenVML_FUNCNAME(vdLog1p)(VML_INT n, const double * a, double * y); + OPENVML_EXPORT void OpenVML_FUNCNAME(vsTanh)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME(vdTanh)(VML_INT n, const double * a, double * y); diff --git a/include/openvml_kernel.h b/include/openvml_kernel.h index 109ffac..aec202c 100644 --- a/include/openvml_kernel.h +++ b/include/openvml_kernel.h @@ -66,6 +66,12 @@ void OpenVML_FUNCNAME(cln_k)(VMLLONG n, float * a, float * b, float * y, float * void OpenVML_FUNCNAME(zln_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); +void OpenVML_FUNCNAME(slog1p_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); +void OpenVML_FUNCNAME(dlog1p_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); +void OpenVML_FUNCNAME(clog1p_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); +void OpenVML_FUNCNAME(zlog1p_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); + + void OpenVML_FUNCNAME(stanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); void OpenVML_FUNCNAME(dtanh_k)(VMLLONG n, double * a, double * b, double * y, double * z, double * other_params); void OpenVML_FUNCNAME(ctanh_k)(VMLLONG n, float * a, float * b, float * y, float * z, float * other_params); diff --git a/include/openvml_macros.h b/include/openvml_macros.h index b91ce2a..9d9d1e7 100644 --- a/include/openvml_macros.h +++ b/include/openvml_macros.h @@ -63,6 +63,10 @@ #define CLN_K OpenVML_FUNCNAME(cln_k) #define ZLN_K OpenVML_FUNCNAME(zln_k) +#define SLOG1P_K OpenVML_FUNCNAME(slog1p_k) +#define DLOG1P_K OpenVML_FUNCNAME(dlog1p_k) +#define CLOG1P_K OpenVML_FUNCNAME(clog1p_k) +#define ZLOG1P_K OpenVML_FUNCNAME(zlog1p_k) #define STANH_K OpenVML_FUNCNAME(stanh_k) #define DTANH_K OpenVML_FUNCNAME(dtanh_k) @@ -78,6 +82,7 @@ #define EXP_K SEXP_K #define LOG10_K SLOG10_K #define LN_K SLN_K +#define LOG1P_K SLOG1P_K #define TANH_K STANH_K #else #define ADD_K DADD_K @@ -86,6 +91,7 @@ #define EXP_K DEXP_K #define LOG10_K DLOG10_K #define LN_K DLN_K +#define LOG1P_K DLOG1P_K #define TANH_K DTANH_K #endif #else @@ -96,6 +102,7 @@ #define EXP_K CEXP_K #define LOG10_K CLOG10_K #define LN_K CLN_K +#define LOG1P_K CLOG1P_K #define TANH_K CTANH_K #else #define ADD_K ZADD_K @@ -104,6 +111,7 @@ #define EXP_K ZEXP_K #define LOG10_K ZLOG10_K #define LN_K ZLN_K +#define LOG1P_K ZLOG1P_K #define TANH_K ZTANH_K #endif #endif diff --git a/include/openvml_reference.h b/include/openvml_reference.h index 815ac21..b368312 100644 --- a/include/openvml_reference.h +++ b/include/openvml_reference.h @@ -58,6 +58,9 @@ OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdLog10)(VML_INT n, const double * a, d OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsLn)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdLn)(VML_INT n, const double * a, double * y); +OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsLog1p)(VML_INT n, const float * a, float * y); +OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdLog1p)(VML_INT n, const double * a, double * y); + OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vsTanh)(VML_INT n, const float * a, float * y); OPENVML_EXPORT void OpenVML_FUNCNAME_REF(vdTanh)(VML_INT n, const double * a, double * y); diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 11a297a..a96359c 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -5,7 +5,7 @@ set(OpenVML_LIBSRC_D "") set(OpenVML_LIBSRC_C "") set(OpenVML_LIBSRC_Z "") -set(REAL_INTERFACE_LIST add sub pow exp tanh log10 ln) +set(REAL_INTERFACE_LIST add sub pow exp tanh log10 ln log1p) set(COMPLEX_INTERFACE_LIST add sub) function(cap_string var_name var_name_cap) diff --git a/interface/log1p.c b/interface/log1p.c new file mode 100644 index 0000000..9c0b516 --- /dev/null +++ b/interface/log1p.c @@ -0,0 +1,39 @@ +/* * Copyright (c) 2014, 2015 Zhang Xianyi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + + +void CNAME(VML_INT n, const VML_FLOAT * a, VML_FLOAT * y) { + + if (n<=0) return; + if (a==NULL || y==NULL) return; + + + EXEC_VML(0, LOG1P_K, n, (VML_FLOAT*)a, NULL, y, NULL, NULL); + +} diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 911414b..cd3f0b1 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -6,7 +6,7 @@ set(OpenVML_LIBSRC_D "") set(OpenVML_LIBSRC_C "") set(OpenVML_LIBSRC_Z "") -set(KERNEL_LIST add sub pow exp tanh log10 ln) #s,d +set(KERNEL_LIST add sub pow exp tanh log10 ln log1p) #s,d set(Z_KERNEL_LIST add sub) #c,z ######## s,d kernels diff --git a/kernel/aarch64/Kernel_generic.txt b/kernel/aarch64/Kernel_generic.txt index 5645f5a..ab3d76d 100644 --- a/kernel/aarch64/Kernel_generic.txt +++ b/kernel/aarch64/Kernel_generic.txt @@ -17,6 +17,11 @@ set(exp_D_KERNEL_SOURCE generic/exp_kernel.c) set(log10_S_KERNEL_SOURCE generic/log10_kernel.c) set(log10_D_KERNEL_SOURCE generic/log10_kernel.c) +set(ln_S_KERNEL_SOURCE generic/ln_kernel.c) +set(ln_D_KERNEL_SOURCE generic/ln_kernel.c) + +set(log1p_S_KERNEL_SOURCE generic/log1p_kernel.c) +set(log1p_D_KERNEL_SOURCE generic/log1p_kernel.c) + set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c) - diff --git a/kernel/arm/Kernel_generic.txt b/kernel/arm/Kernel_generic.txt index dc95be3..70922dd 100644 --- a/kernel/arm/Kernel_generic.txt +++ b/kernel/arm/Kernel_generic.txt @@ -20,5 +20,8 @@ set(log10_D_KERNEL_SOURCE generic/log10_kernel.c) set(log10_S_KERNEL_SOURCE generic/ln_kernel.c) set(log10_D_KERNEL_SOURCE generic/ln_kernel.c) +set(log1p_S_KERNEL_SOURCE generic/log1p_kernel.c) +set(log1p_D_KERNEL_SOURCE generic/log1p_kernel.c) + set(tanh_S_KERNEL_SOURCE generic/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE generic/tanh_kernel.c) diff --git a/kernel/generic/Kernel_generic.txt b/kernel/generic/Kernel_generic.txt index 06ad1f7..158c62d 100644 --- a/kernel/generic/Kernel_generic.txt +++ b/kernel/generic/Kernel_generic.txt @@ -22,6 +22,8 @@ set(log10_D_KERNEL_SOURCE ${OpenVML_ARCH}/log10_kernel.c) set(ln_S_KERNEL_SOURCE ${OpenVML_ARCH}/ln_kernel.c) set(ln_D_KERNEL_SOURCE ${OpenVML_ARCH}/ln_kernel.c) +set(log1p_S_KERNEL_SOURCE ${OpenVML_ARCH}/log1p_kernel.c) +set(log1p_D_KERNEL_SOURCE ${OpenVML_ARCH}/log1p_kernel.c) + set(tanh_S_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c) set(tanh_D_KERNEL_SOURCE ${OpenVML_ARCH}/tanh_kernel.c) - diff --git a/kernel/generic/log1p_kernel.c b/kernel/generic/log1p_kernel.c new file mode 100644 index 0000000..a32a829 --- /dev/null +++ b/kernel/generic/log1p_kernel.c @@ -0,0 +1,40 @@ +/* * Copyright (c) 2014, 2015 Zhang Xianyi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, this + * list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "openvml_kernel.h" + +#ifndef DOUBLE +#define LOG1P log1pf +#else +#define LOG1P log1p +#endif + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG i=0; + for(i=0; i +#include "openvml_kernel.h" + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG loop_count=(COMPSIZE*n) >> 5; + VMLLONG remain_count=(COMPSIZE*n) & 0x1f; + + int i=0; + + while(loop_count>0){ + + __m256d av0=_mm256_loadu_pd(a); + __m256d av1=_mm256_loadu_pd(a+4); + __m256d av2=_mm256_loadu_pd(a+8); + __m256d av3=_mm256_loadu_pd(a+12); + + __m256d av4=_mm256_loadu_pd(a+16); + __m256d av5=_mm256_loadu_pd(a+20); + __m256d av6=_mm256_loadu_pd(a+24); + __m256d av7=_mm256_loadu_pd(a+28); + + + + + __m256d yv0=_mm256_log1p_pd(av0); + __m256d yv1=_mm256_log1p_pd(av1); + __m256d yv2=_mm256_log1p_pd(av2); + __m256d yv3=_mm256_log1p_pd(av3); + + __m256d yv4=_mm256_log1p_pd(av4); + __m256d yv5=_mm256_log1p_pd(av5); + __m256d yv6=_mm256_log1p_pd(av6); + __m256d yv7=_mm256_log1p_pd(av7); + + _mm256_storeu_pd(y, yv0); + _mm256_storeu_pd(y+4, yv1); + _mm256_storeu_pd(y+8, yv2); + _mm256_storeu_pd(y+12, yv3); + + _mm256_storeu_pd(y+16, yv4); + _mm256_storeu_pd(y+20, yv5); + _mm256_storeu_pd(y+24, yv6); + _mm256_storeu_pd(y+28, yv7); + + a+=32; + y+=32; + loop_count--; + } + + for(i=0; i +#include "openvml_kernel.h" + +void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + void KERNEL_NAME(VMLLONG n, VML_FLOAT * a, VML_FLOAT * b, VML_FLOAT * y, VML_FLOAT * z, VML_FLOAT * other_params) { + VMLLONG loop_count=(COMPSIZE*n) >> 5; + VMLLONG remain_count=(COMPSIZE*n) & 0x1f; + + int i=0; + + while(loop_count>0){ + + __m256 av0=_mm256_loadu_ps(a); + __m256 av1=_mm256_loadu_ps(a+8); + __m256 av2=_mm256_loadu_ps(a+16); + __m256 av3=_mm256_loadu_ps(a+24); + + + __m256 yv0=_mm256_log1p_ps(av0); + __m256 yv1=_mm256_log1p_ps(av1); + __m256 yv2=_mm256_log1p_ps(av2); + __m256 yv3=_mm256_log1p_ps(av3); + + + _mm256_storeu_ps(y, yv0); + _mm256_storeu_ps(y+8, yv1); + _mm256_storeu_ps(y+16, yv2); + _mm256_storeu_ps(y+24, yv3); + + a+=32; + y+=32; + loop_count--; + } + + for(i=0; i +#include +#include + +void OpenVML_FUNCNAME_REF(vsLog1p)(VML_INT n, const float * a, float * y){ + VML_INT i; + if (n<=0) return; + if (a==NULL || y==NULL) return; + + for(i=0; i +#include +#include + +static char* funcname[4]={"vsLog1p", "vdLog1p", NULL,NULL}; +static double flop_per_elem[4]={0.0, 0.0, 0.0, 0.0}; + +static a_y_func_t ref_vLog1p[] = { + (a_y_func_t)OpenVML_FUNCNAME_REF(vsLog1p), + (a_y_func_t)OpenVML_FUNCNAME_REF(vdLog1p), + NULL, + NULL, +}; + +static a_y_func_t test_vLog1p[] = { + (a_y_func_t)OpenVML_FUNCNAME(vsLog1p), + (a_y_func_t)OpenVML_FUNCNAME(vdLog1p), + NULL, + NULL, +}; + + +CTEST2(check_result_s, log1p){ + run_test_a_y(data->parameter, funcname, test_vLog1p, ref_vLog1p, flop_per_elem); +} + +CTEST2(check_result_d, log1p){ + run_test_a_y(data->parameter, funcname, test_vLog1p, ref_vLog1p, flop_per_elem); +}