From 2992e3886aa6304ac2715890f4fbd8548e891c53 Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Mon, 22 Oct 2018 23:21:49 +0300 Subject: [PATCH 01/17] disable threading in C/ZSWAP copying from S/DSWAP --- interface/zswap.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/interface/zswap.c b/interface/zswap.c index e33bbafbac..372b15447a 100644 --- a/interface/zswap.c +++ b/interface/zswap.c @@ -42,6 +42,14 @@ #include "functable.h" #endif +#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8) +// Multithreaded swap gives performance benefits in ThunderX2T99 +#else +// Disable multi-threading as it does not show any performance +// benefits. Keep the multi-threading code for the record. +#undef SMP +#endif + #ifndef CBLAS void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){ @@ -81,7 +89,7 @@ FLOAT *y = (FLOAT*)vy; #ifdef SMP //disable multi-thread when incx==0 or incy==0 //In that case, the threads would be dependent. - if (incx == 0 || incy == 0) + if (incx == 0 || incy == 0 || n < 1048576 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT)) nthreads = 1; else nthreads = num_cpu_avail(1); From 64ca44873bd9d960c63456a43fd565c56514e895 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 28 Oct 2018 18:36:55 +0100 Subject: [PATCH 02/17] Fix detection of Ryzen2 (missing CORE_ZEN) --- cpuid_x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpuid_x86.c b/cpuid_x86.c index 512ad877bb..8e4a7cb84b 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -2009,6 +2009,8 @@ int get_coretype(void){ switch (model) { case 1: // AMD Ryzen + case 8: + // Ryzen 2 if(support_avx()) #ifndef NO_AVX2 return CORE_ZEN; From 38cf5d93647bf5ffb5fe3e17447eba0c157bb305 Mon Sep 17 00:00:00 2001 From: "Erik M. Bray" Date: Sun, 28 Oct 2018 21:16:52 +0000 Subject: [PATCH 03/17] ensure that threading has been initialized in the first place before calling openblas_set_num_threads --- driver/others/blas_server.c | 5 +++++ driver/others/blas_server_win32.c | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 6a25e2d073..e5db1804f9 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -850,6 +850,11 @@ void goto_set_num_threads(int num_threads) { long i; +#ifdef SMP_SERVER + // Handle lazy re-init of the thread-pool after a POSIX fork + if (unlikely(blas_server_avail == 0)) blas_thread_init(); +#endif + if (num_threads < 1) num_threads = blas_num_threads; #ifndef NO_AFFINITY diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c index 02a25ac397..bae344c593 100644 --- a/driver/others/blas_server_win32.c +++ b/driver/others/blas_server_win32.c @@ -478,7 +478,12 @@ int BLASFUNC(blas_thread_shutdown)(void){ void goto_set_num_threads(int num_threads) { - long i; + long i; + +#if defined(SMP_SERVER) && defined(OS_CYGWIN_NT) + // Handle lazy re-init of the thread-pool after a POSIX fork + if (unlikely(blas_server_avail == 0)) blas_thread_init(); +#endif if (num_threads < 1) num_threads = blas_cpu_number; From 326d394a0fbcc8226bb958f523ca1005696c33b6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 31 Oct 2018 18:38:22 +0100 Subject: [PATCH 04/17] Add get_num_procs implementation for AIX (and copy HAIKU implementation to the non-TLS version of the code as well) --- driver/others/memory.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/driver/others/memory.c b/driver/others/memory.c index 4a8e6c0675..25f198623b 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -259,6 +259,16 @@ int get_num_procs(void) { } #endif +#ifdef OS_AIX +int get_num_procs(void) { + static int nums = 0; + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); + return nums; +} +#endif + + + #ifdef OS_WINDOWS int get_num_procs(void) { @@ -1738,6 +1748,22 @@ int get_num_procs(void) { return nums; } #endif + +#ifdef OS_HAIKU +int get_num_procs(void) { + static int nums = 0; + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); + return nums; +} +#endif + +#ifdef OS_AIX +int get_num_procs(void) { + static int nums = 0; + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); + return nums; +} +#endif #ifdef OS_WINDOWS From 7b5aea52bb105c15d7e80e0749b80f6bfb0566b6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 31 Oct 2018 21:50:34 +0100 Subject: [PATCH 05/17] Accomodate AIX install, which has different syntax for #1803 --- Makefile.install | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Makefile.install b/Makefile.install index fa657beba1..7aa477cf04 100644 --- a/Makefile.install +++ b/Makefile.install @@ -48,6 +48,7 @@ ifndef NO_CBLAS @sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h" endif +ifneq (($OSNAME), AIX) ifndef NO_LAPACKE @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" @@ -72,6 +73,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku)) ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) endif + ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly)) @cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ @@ -93,6 +95,33 @@ ifeq ($(OSNAME), CYGWIN_NT) endif endif +else +#install on AIX has different options syntax +ifndef NO_LAPACKE + @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) + @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" + @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h" + @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h" + @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h" +endif + +#for install static library +ifndef NO_STATIC + @echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) + @install -M 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" + @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ + ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) +endif +#for install shared library +ifndef NO_SHARED + @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) + @install -M 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" + @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ + ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ + ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) +endif + +endif #Generating openblas.pc @echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" From dcc5d6291e7b02761acfb6161c04ba1f8f25b502 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 1 Nov 2018 01:42:09 +0000 Subject: [PATCH 06/17] skylakex: Make the sgemm/dgemm beta code robust for a N=0 or M=0 case in the threading code there are cases where N or M can become 0, and the optimized beta code did not handle this well, leading to a crash during the audit for the crash a few edge conditions on the if statements were found and fixed as well --- kernel/x86_64/dgemm_beta_skylakex.c | 6 ++++-- kernel/x86_64/sgemm_beta_skylakex.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/x86_64/dgemm_beta_skylakex.c b/kernel/x86_64/dgemm_beta_skylakex.c index 384e9f60ba..6a824c9b50 100644 --- a/kernel/x86_64/dgemm_beta_skylakex.c +++ b/kernel/x86_64/dgemm_beta_skylakex.c @@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, return 0; } + if (m == 0 || n == 0) + return 0; c_offset = c; @@ -69,7 +71,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, i = m; - while (i > 32) { + while (i >= 32) { _mm512_storeu_pd(c_offset1, z_zero); _mm512_storeu_pd(c_offset1 + 8, z_zero); _mm512_storeu_pd(c_offset1 + 16, z_zero); @@ -77,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, c_offset1 += 32; i -= 32; } - while (i > 8) { + while (i >= 8) { _mm512_storeu_pd(c_offset1, z_zero); c_offset1 += 8; i -= 8; diff --git a/kernel/x86_64/sgemm_beta_skylakex.c b/kernel/x86_64/sgemm_beta_skylakex.c index 54f9664e9d..4e40acadf8 100644 --- a/kernel/x86_64/sgemm_beta_skylakex.c +++ b/kernel/x86_64/sgemm_beta_skylakex.c @@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, return 0; } + if (n == 0 || m == 0) + return; c_offset = c; @@ -71,13 +73,13 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, i = m; - while (i > 32) { + while (i >= 32) { _mm512_storeu_ps(c_offset1, z_zero); _mm512_storeu_ps(c_offset1 + 16, z_zero); c_offset1 += 32; i -= 32; } - while (i > 8) { + while (i >= 8) { _mm256_storeu_ps(c_offset1, y_zero); c_offset1 += 8; i -= 8; From 5b708e5eb1b17af9c45e0da2993da8a4756cb912 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 1 Nov 2018 01:43:20 +0000 Subject: [PATCH 07/17] sgemm/dgemm: add a way for an arch kernel to specify prefered sizes The current gemm threading code can make very unfortunate choices, for example on my 10 core system a 1024x1024x1024 matrix multiply ends up chunking into blocks of 102... which is not a vector friendly size and performance ends up horrible. this patch adds a helper define where an architecture can specify a preference for size multiples. This is different from existing defines that are minimum sizes and such. The performance increase with this patch for the 1024x1024x1024 sgemm is 2.3x (!!) --- driver/level3/level3_thread.c | 22 ++++++++++++++++++++++ param.h | 1 + 2 files changed, 23 insertions(+) diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index aeb5e6ed4e..de29247d4e 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -48,6 +48,10 @@ #define SWITCH_RATIO 2 #endif +#ifndef GEMM_PREFERED_SIZE +#define GEMM_PREFERED_SIZE 1 +#endif + //The array of job_t may overflow the stack. //Instead, use malloc to alloc job_t. #if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD @@ -510,6 +514,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, return 0; } +static int round_up(int remainder, int width, int multiple) +{ + if (multiple > remainder || width <= multiple) + return width; + width = (width + multiple - 1) / multiple; + width = width * multiple; + return width; +} + + static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG nthreads_m, BLASLONG nthreads_n) { @@ -601,9 +615,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG num_parts = 0; while (m > 0){ width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts); + + width = round_up(m, width, GEMM_PREFERED_SIZE); + m -= width; + if (m < 0) width = width + m; range_M[num_parts + 1] = range_M[num_parts] + width; + num_parts ++; } for (i = num_parts; i < MAX_CPU_NUMBER; i++) { @@ -645,9 +664,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG if (width < SWITCH_RATIO) { width = SWITCH_RATIO; } + width = round_up(n, width, GEMM_PREFERED_SIZE); + n -= width; if (n < 0) width = width + n; range_N[num_parts + 1] = range_N[num_parts] + width; + num_parts ++; } for (j = num_parts; j < MAX_CPU_NUMBER; j++) { diff --git a/param.h b/param.h index e4ec1b2b53..d1b2115845 100644 --- a/param.h +++ b/param.h @@ -1627,6 +1627,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 8 #define SWITCH_RATIO 32 +#define GEMM_PREFERED_SIZE 32 #ifdef ARCH_X86 From b0255231979ac40444fea06bc8958731fdcdef7a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 1 Nov 2018 18:26:08 +0100 Subject: [PATCH 08/17] Use installbsd on AIX (and fix misplaced parenthesis from previous commit). See #1803 --- Makefile.install | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Makefile.install b/Makefile.install index 7aa477cf04..069c96c6aa 100644 --- a/Makefile.install +++ b/Makefile.install @@ -48,7 +48,7 @@ ifndef NO_CBLAS @sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h" endif -ifneq (($OSNAME), AIX) +ifneq ($(OSNAME), AIX) ifndef NO_LAPACKE @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) @-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" @@ -99,23 +99,23 @@ else #install on AIX has different options syntax ifndef NO_LAPACKE @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) - @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" - @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h" - @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h" - @-install -M 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h" + @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" + @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h" + @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h" + @-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h" endif #for install static library ifndef NO_STATIC @echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) - @install -M 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" + @installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) endif #for install shared library ifndef NO_SHARED @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) - @install -M 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" + @installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" @cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) From 9c177d270b7ae78c4542a15ec02d8cab9cc7f367 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 1 Nov 2018 18:50:25 +0100 Subject: [PATCH 09/17] Restore Android/ARMv7 build fix from #778 for #1811 --- lapack-netlib/LAPACKE/include/lapacke_config.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lapack-netlib/LAPACKE/include/lapacke_config.h b/lapack-netlib/LAPACKE/include/lapacke_config.h index 1e2509bf01..8262c3488b 100644 --- a/lapack-netlib/LAPACKE/include/lapacke_config.h +++ b/lapack-netlib/LAPACKE/include/lapacke_config.h @@ -34,6 +34,13 @@ #ifndef _LAPACKE_CONFIG_H_ #define _LAPACKE_CONFIG_H_ +// For Android prior to API 21 (no include) +#if defined(__ANDROID__) +#if __ANDROID_API__ < 21 +#define LAPACK_COMPLEX_STRUCTURE +#endif +#endif + #ifdef __cplusplus #if defined(LAPACK_COMPLEX_CPP) #include From fb5b2177ca794f81f85530f223dd630e147092ca Mon Sep 17 00:00:00 2001 From: Renato Golin Date: Mon, 5 Nov 2018 11:30:12 +0000 Subject: [PATCH 10/17] [Arm64) Revert A53 detection as A57 This patch reverts the decision of treating A53 like A57, which was based on an analysis done on server class hardware and is not representative of all A53s out there. Fixes #1855. --- cpuid_arm64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 17078fe7fd..3acb395b57 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -115,8 +115,8 @@ int detect(void) fclose(infile); if(cpu_part != NULL && cpu_implementer != NULL) { if (strstr(cpu_implementer, "0x41") && - (strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08") || strstr(cpu_part,"0xd03") )) - return CPU_CORTEXA57; //or compatible A53, A72 + (strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08"))) + return CPU_CORTEXA57; //or compatible, ex. A72 else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42")) return CPU_VULCAN; else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43")) From 066f8065d19c5d0e9525e9ccf99e6dac9712dffa Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Tue, 6 Nov 2018 08:19:08 +0000 Subject: [PATCH 11/17] init From 7d3502b5003ad54903b7a9e9aec5a853dfbe0221 Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Tue, 6 Nov 2018 08:20:55 +0000 Subject: [PATCH 12/17] Add -frecursive gfortran option by default --- Makefile.rule | 4 ++-- Makefile.system | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile.rule b/Makefile.rule index 6522b07771..d97607f2ec 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -192,8 +192,8 @@ NO_AFFINITY = 1 # Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT # COMMON_OPT = -O2 -# gfortran option for LAPACK -# enable this flag only on 64bit Linux and if you need a thread safe lapack library +# gfortran option for LAPACK to improve thread-safety +# It is enabled by default in Makefile.system for gfortran # Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT # FCOMMON_OPT = -frecursive diff --git a/Makefile.system b/Makefile.system index b4cd4222a4..8de0b8f6e6 100644 --- a/Makefile.system +++ b/Makefile.system @@ -718,6 +718,8 @@ endif ifeq ($(F_COMPILER), GFORTRAN) CCOMMON_OPT += -DF_INTERFACE_GFORT FCOMMON_OPT += -Wall +# make single-threaded LAPACK calls thread-safe #1847 +FCOMMON_OPT += -frecursive #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc ifneq ($(NO_LAPACK), 1) EXTRALIB += -lgfortran From a931afe269efc21a6710376254fb14d7bed085d8 Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Tue, 6 Nov 2018 09:39:05 +0000 Subject: [PATCH 13/17] init From 3fd41313fc2c36ea55a5e3aaf02cf2734f2d18c5 Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Tue, 6 Nov 2018 09:40:13 +0000 Subject: [PATCH 14/17] add low bound for number of buffers --- common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.h b/common.h index 6c3d5b15e4..60da2416ad 100644 --- a/common.h +++ b/common.h @@ -183,7 +183,7 @@ extern "C" { #define ALLOCA_ALIGN 63UL -#define NUM_BUFFERS (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER) +#define NUM_BUFFERS MAX(64,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)) #ifdef NEEDBUNDERSCORE #define BLASFUNC(FUNC) FUNC##_ From 40cce0e353ca21ed1d045b4fc58faddd2ff6c2a7 Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Tue, 6 Nov 2018 09:45:49 +0000 Subject: [PATCH 15/17] handle cmake too --- cmake/fc.cmake | 2 +- common.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/fc.cmake b/cmake/fc.cmake index 38d59f9561..adec28a912 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -44,7 +44,7 @@ endif () if (${F_COMPILER} STREQUAL "GFORTRAN") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") - set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive") #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc if (NOT NO_LAPACK) set(EXTRALIB "{EXTRALIB} -lgfortran") diff --git a/common.h b/common.h index 6c3d5b15e4..60da2416ad 100644 --- a/common.h +++ b/common.h @@ -183,7 +183,7 @@ extern "C" { #define ALLOCA_ALIGN 63UL -#define NUM_BUFFERS (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER) +#define NUM_BUFFERS MAX(64,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)) #ifdef NEEDBUNDERSCORE #define BLASFUNC(FUNC) FUNC##_ From 9531d0e1757dc0edd64c5c439d65fb236195410a Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Tue, 6 Nov 2018 17:51:24 +0000 Subject: [PATCH 16/17] lets fit it in one 4k page --- common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.h b/common.h index 60da2416ad..7fcd5e3163 100644 --- a/common.h +++ b/common.h @@ -183,7 +183,7 @@ extern "C" { #define ALLOCA_ALIGN 63UL -#define NUM_BUFFERS MAX(64,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)) +#define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)) #ifdef NEEDBUNDERSCORE #define BLASFUNC(FUNC) FUNC##_ From cfb0f5b0f82e67cf3cc854c8319ddb79ecd1366c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 8 Nov 2018 22:39:10 +0100 Subject: [PATCH 17/17] Set LIBSONAME suffix to .a for AIX another fix for #1803 --- Makefile.system | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile.system b/Makefile.system index 7847c75251..716bd18e26 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1211,7 +1211,11 @@ endif LIBDLLNAME = $(LIBPREFIX).dll IMPLIBNAME = lib$(LIBNAMEBASE).dll.a +ifneq ($(OSNAME), AIX) LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so) +else +LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a) +endif LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib) LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def) LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)