Skip to content

Commit

Permalink
Merge pull request #2 from xianyi/develop
Browse files Browse the repository at this point in the history
merge develop
  • Loading branch information
martin-frbg authored Nov 10, 2018
2 parents 76a66ea + b0c15ba commit 4f43668
Show file tree
Hide file tree
Showing 16 changed files with 127 additions and 12 deletions.
29 changes: 29 additions & 0 deletions Makefile.install
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ ifndef NO_CBLAS
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
endif

ifneq ($(OSNAME), AIX)
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
Expand All @@ -72,6 +73,7 @@ ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif

ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
Expand All @@ -93,6 +95,33 @@ ifeq ($(OSNAME), CYGWIN_NT)
endif
endif

else
#install on AIX has different options syntax
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
endif

#for install static library
ifndef NO_STATIC
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif

endif

#Generating openblas.pc
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
Expand Down
4 changes: 2 additions & 2 deletions Makefile.rule
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ NO_AFFINITY = 1
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
# COMMON_OPT = -O2

# gfortran option for LAPACK
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
# gfortran option for LAPACK to improve thread-safety
# It is enabled by default in Makefile.system for gfortran
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
# FCOMMON_OPT = -frecursive

Expand Down
6 changes: 6 additions & 0 deletions Makefile.system
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,8 @@ endif
ifeq ($(F_COMPILER), GFORTRAN)
CCOMMON_OPT += -DF_INTERFACE_GFORT
FCOMMON_OPT += -Wall
# make single-threaded LAPACK calls thread-safe #1847
FCOMMON_OPT += -frecursive
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
ifneq ($(NO_LAPACK), 1)
EXTRALIB += -lgfortran
Expand Down Expand Up @@ -1211,7 +1213,11 @@ endif

LIBDLLNAME = $(LIBPREFIX).dll
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
ifneq ($(OSNAME), AIX)
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
else
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a)
endif
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
Expand Down
2 changes: 1 addition & 1 deletion cmake/fc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ endif ()

if (${F_COMPILER} STREQUAL "GFORTRAN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
if (NOT NO_LAPACK)
set(EXTRALIB "{EXTRALIB} -lgfortran")
Expand Down
2 changes: 1 addition & 1 deletion common.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ extern "C" {

#define ALLOCA_ALIGN 63UL

#define NUM_BUFFERS (MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER)
#define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER))

#ifdef NEEDBUNDERSCORE
#define BLASFUNC(FUNC) FUNC##_
Expand Down
4 changes: 2 additions & 2 deletions cpuid_arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ int detect(void)
fclose(infile);
if(cpu_part != NULL && cpu_implementer != NULL) {
if (strstr(cpu_implementer, "0x41") &&
(strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08") || strstr(cpu_part,"0xd03") ))
return CPU_CORTEXA57; //or compatible A53, A72
(strstr(cpu_part, "0xd07") || strstr(cpu_part,"0xd08")))
return CPU_CORTEXA57; //or compatible, ex. A72
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
return CPU_VULCAN;
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
Expand Down
2 changes: 2 additions & 0 deletions cpuid_x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -2009,6 +2009,8 @@ int get_coretype(void){
switch (model) {
case 1:
// AMD Ryzen
case 8:
// Ryzen 2
if(support_avx())
#ifndef NO_AVX2
return CORE_ZEN;
Expand Down
22 changes: 22 additions & 0 deletions driver/level3/level3_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
#define SWITCH_RATIO 2
#endif

#ifndef GEMM_PREFERED_SIZE
#define GEMM_PREFERED_SIZE 1
#endif

//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
Expand Down Expand Up @@ -510,6 +514,16 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
return 0;
}

static int round_up(int remainder, int width, int multiple)
{
if (multiple > remainder || width <= multiple)
return width;
width = (width + multiple - 1) / multiple;
width = width * multiple;
return width;
}


static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
*range_n, FLOAT *sa, FLOAT *sb,
BLASLONG nthreads_m, BLASLONG nthreads_n) {
Expand Down Expand Up @@ -601,9 +615,14 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
num_parts = 0;
while (m > 0){
width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts);

width = round_up(m, width, GEMM_PREFERED_SIZE);

m -= width;

if (m < 0) width = width + m;
range_M[num_parts + 1] = range_M[num_parts] + width;

num_parts ++;
}
for (i = num_parts; i < MAX_CPU_NUMBER; i++) {
Expand Down Expand Up @@ -645,9 +664,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
if (width < SWITCH_RATIO) {
width = SWITCH_RATIO;
}
width = round_up(n, width, GEMM_PREFERED_SIZE);

n -= width;
if (n < 0) width = width + n;
range_N[num_parts + 1] = range_N[num_parts] + width;

num_parts ++;
}
for (j = num_parts; j < MAX_CPU_NUMBER; j++) {
Expand Down
5 changes: 5 additions & 0 deletions driver/others/blas_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,11 @@ void goto_set_num_threads(int num_threads) {

long i;

#ifdef SMP_SERVER
// Handle lazy re-init of the thread-pool after a POSIX fork
if (unlikely(blas_server_avail == 0)) blas_thread_init();
#endif

if (num_threads < 1) num_threads = blas_num_threads;

#ifndef NO_AFFINITY
Expand Down
7 changes: 6 additions & 1 deletion driver/others/blas_server_win32.c
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,12 @@ int BLASFUNC(blas_thread_shutdown)(void){

void goto_set_num_threads(int num_threads)
{
long i;
long i;

#if defined(SMP_SERVER) && defined(OS_CYGWIN_NT)
// Handle lazy re-init of the thread-pool after a POSIX fork
if (unlikely(blas_server_avail == 0)) blas_thread_init();
#endif

if (num_threads < 1) num_threads = blas_cpu_number;

Expand Down
26 changes: 26 additions & 0 deletions driver/others/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,16 @@ int get_num_procs(void) {
}
#endif

#ifdef OS_AIX
int get_num_procs(void) {
static int nums = 0;
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
return nums;
}
#endif



#ifdef OS_WINDOWS

int get_num_procs(void) {
Expand Down Expand Up @@ -1738,6 +1748,22 @@ int get_num_procs(void) {
return nums;
}
#endif

#ifdef OS_HAIKU
int get_num_procs(void) {
static int nums = 0;
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
return nums;
}
#endif

#ifdef OS_AIX
int get_num_procs(void) {
static int nums = 0;
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
return nums;
}
#endif

#ifdef OS_WINDOWS

Expand Down
10 changes: 9 additions & 1 deletion interface/zswap.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,14 @@
#include "functable.h"
#endif

#if defined(THUNDERX2T99) || defined(VULCAN) || defined(ARMV8)
// Multithreaded swap gives performance benefits in ThunderX2T99
#else
// Disable multi-threading as it does not show any performance
// benefits. Keep the multi-threading code for the record.
#undef SMP
#endif

#ifndef CBLAS

void NAME(blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
Expand Down Expand Up @@ -81,7 +89,7 @@ FLOAT *y = (FLOAT*)vy;
#ifdef SMP
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
if (incx == 0 || incy == 0 || n < 1048576 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT))
nthreads = 1;
else
nthreads = num_cpu_avail(1);
Expand Down
6 changes: 4 additions & 2 deletions kernel/x86_64/dgemm_beta_skylakex.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
return 0;
}

if (m == 0 || n == 0)
return 0;

c_offset = c;

Expand All @@ -69,15 +71,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,

i = m;

while (i > 32) {
while (i >= 32) {
_mm512_storeu_pd(c_offset1, z_zero);
_mm512_storeu_pd(c_offset1 + 8, z_zero);
_mm512_storeu_pd(c_offset1 + 16, z_zero);
_mm512_storeu_pd(c_offset1 + 24 , z_zero);
c_offset1 += 32;
i -= 32;
}
while (i > 8) {
while (i >= 8) {
_mm512_storeu_pd(c_offset1, z_zero);
c_offset1 += 8;
i -= 8;
Expand Down
6 changes: 4 additions & 2 deletions kernel/x86_64/sgemm_beta_skylakex.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
return 0;
}

if (n == 0 || m == 0)
return;

c_offset = c;

Expand All @@ -71,13 +73,13 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,

i = m;

while (i > 32) {
while (i >= 32) {
_mm512_storeu_ps(c_offset1, z_zero);
_mm512_storeu_ps(c_offset1 + 16, z_zero);
c_offset1 += 32;
i -= 32;
}
while (i > 8) {
while (i >= 8) {
_mm256_storeu_ps(c_offset1, y_zero);
c_offset1 += 8;
i -= 8;
Expand Down
7 changes: 7 additions & 0 deletions lapack-netlib/LAPACKE/include/lapacke_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@
#ifndef _LAPACKE_CONFIG_H_
#define _LAPACKE_CONFIG_H_

// For Android prior to API 21 (no <complex> include)
#if defined(__ANDROID__)
#if __ANDROID_API__ < 21
#define LAPACK_COMPLEX_STRUCTURE
#endif
#endif

#ifdef __cplusplus
#if defined(LAPACK_COMPLEX_CPP)
#include <complex>
Expand Down
1 change: 1 addition & 0 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -1627,6 +1627,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYMV_P 8

#define SWITCH_RATIO 32
#define GEMM_PREFERED_SIZE 32

#ifdef ARCH_X86

Expand Down

0 comments on commit 4f43668

Please sign in to comment.