Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dynamic dispatch to AIX and clang for Power #4280

Merged
Merged
Changes from 26 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a92dc25
Fix Makefile.power for xlf
ChipKerchner Sep 29, 2023
a69367c
Fix Makefile.system for OpenXL.
ChipKerchner Sep 29, 2023
af3e172
Merge branch 'develop' into XLC-AIX
ChipKerchner Oct 2, 2023
3b1150f
Fix CPU identification to work on AIX.
ChipKerchner Oct 2, 2023
48da98b
Merge remote-tracking branch 'origin/develop' into XLC-AIX
ChipKerchner Oct 2, 2023
eb738d9
Minor changes.
ChipKerchner Oct 2, 2023
12130ee
Remove tab.
ChipKerchner Oct 2, 2023
a922a07
Cleanup white spaces.
ChipKerchner Oct 2, 2023
1021074
Revert PGI changes.
ChipKerchner Oct 2, 2023
e5dc376
Remove duplicate defines.
ChipKerchner Oct 2, 2023
b677d0d
Adding missing endif
ChipKerchner Oct 2, 2023
4dc435b
Merge branch 'XLC-AIX' of github.ibm.com:PowerAppLibs/openblas into X…
ChipKerchner Oct 2, 2023
a8c90eb
Added cpu_is
ChipKerchner Oct 3, 2023
2d0b233
Fix missing parens.
ChipKerchner Oct 3, 2023
1c4ec47
Merge branch 'XLC-AIX' of github.ibm.com:PowerAppLibs/openblas into X…
ChipKerchner Oct 3, 2023
09212f8
Fix default case for cpu_is.
ChipKerchner Oct 3, 2023
7f7483f
Merge branch 'XLC-AIX' of github.ibm.com:PowerAppLibs/openblas into X…
ChipKerchner Oct 3, 2023
3cc72a3
Only include cpu_id and cpu_supports in AIX and fix parameter types.
ChipKerchner Oct 4, 2023
c60f9d9
Add missing CPU_POWER5.
ChipKerchner Oct 6, 2023
71c6689
Fix dynamic dispatch to work for clang.
ChipKerchner Oct 6, 2023
298bf1f
Reduce differences.
ChipKerchner Oct 6, 2023
36e08f6
One more small change.
ChipKerchner Oct 6, 2023
3655632
Another small change.
ChipKerchner Oct 6, 2023
880af05
Fix dynamic dispatch P9 for clang.
ChipKerchner Oct 6, 2023
badfb2e
Merge branch 'develop' into XLC-AIX
ChipKerchner Oct 26, 2023
d8e6e2b
Merge branch 'develop' into dynamicDispatchAIXandClang
ChipKerchner Nov 1, 2023
c8882bd
Remove POWER7 from cpu list.
ChipKerchner Nov 1, 2023
7dcb2d6
Have POWER7 return arch=POWER6.
ChipKerchner Nov 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 75 additions & 27 deletions driver/others/dynamic_power.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

extern gotoblas_t gotoblas_POWER6;
extern gotoblas_t gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
extern gotoblas_t gotoblas_POWER9;
#endif
#ifdef HAVE_P10_SUPPORT
Expand All @@ -20,14 +20,14 @@ static char *corename[] = {
"POWER10"
};

#define NUM_CORETYPES 4
#define NUM_CORETYPES 5

char *gotoblas_corename(void) {
#ifndef C_PGI
if (gotoblas == &gotoblas_POWER6) return corename[1];
#endif
if (gotoblas == &gotoblas_POWER8) return corename[2];
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
if (gotoblas == &gotoblas_POWER9) return corename[3];
#endif
#ifdef HAVE_P10_SUPPORT
Expand All @@ -36,13 +36,38 @@ char *gotoblas_corename(void) {
return corename[0];
}

#if defined(__clang__)
static int __builtin_cpu_supports(char* arg)
#define CPU_UNKNOWN 0
#define CPU_POWER5 5
#define CPU_POWER6 6
#define CPU_POWER7 7
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

POWER7 currently does not exist as an actual, unique target - it is an alias for POWER6. Do you plan to change that ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

#define CPU_POWER8 8
#define CPU_POWER9 9
#define CPU_POWER10 10

#ifdef _AIX
#include <sys/systemcfg.h>

static int cpuid(void)
{
return 0;
}
int arch = _system_configuration.implementation;
#ifdef POWER_6
if (arch == POWER_6) return CPU_POWER6;
#endif

#ifdef POWER_7
else if (arch == POWER_7) return CPU_POWER7;
#endif
#ifdef POWER_8
else if (arch == POWER_8) return CPU_POWER8;
#endif
#ifdef POWER_9
else if (arch == POWER_9) return CPU_POWER9;
#endif
#ifdef POWER_10
else if (arch == POWER_10) return CPU_POWER10;
#endif
return CPU_UNKNOWN;
}
#else
#if defined(C_PGI) || defined(__clang__)
/*
* NV HPC compilers do not yet implement __builtin_cpu_is().
Expand All @@ -53,21 +78,12 @@ static int __builtin_cpu_supports(char* arg)
* what was requested.
*/

#include <string.h>

/*
* Define POWER processor version table.
*
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
*/

#define CPU_UNKNOWN 0
#define CPU_POWER5 5
#define CPU_POWER6 6
#define CPU_POWER8 8
#define CPU_POWER9 9
#define CPU_POWER10 10

static struct {
uint32_t pvr_mask;
uint32_t pvr_value;
Expand Down Expand Up @@ -160,7 +176,8 @@ static struct {
},
};

static int __builtin_cpu_is(const char *cpu) {
static int cpuid(void)
{
int i;
uint32_t pvr;
uint32_t cpu_type;
Expand All @@ -178,15 +195,42 @@ static int __builtin_cpu_is(const char *cpu) {
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
#endif
cpu_type = pvrPOWER[i].cpu_type;
return (int)(cpu_type);
}
#endif /* C_PGI */
#endif /* _AIX */

#ifndef __BUILTIN_CPU_SUPPORTS__
#include <string.h>

if (!strcmp(cpu, "power8"))
return cpu_type == CPU_POWER8;
if (!strcmp(cpu, "power9"))
return cpu_type == CPU_POWER9;
return 0;
static int __builtin_cpu_is(const char *arg)
{
static int ipinfo = -1;
if (ipinfo < 0) {
ipinfo = cpuid();
}
#ifdef HAVE_P10_SUPPORT
if (ipinfo == CPU_POWER10) {
if (!strcmp(arg, "power10")) return 1;
}
#endif
if (ipinfo == CPU_POWER9) {
if (!strcmp(arg, "power9")) return 1;
} else if (ipinfo == CPU_POWER8) {
if (!strcmp(arg, "power8")) return 1;
#ifndef C_PGI
} else if (ipinfo == CPU_POWER6) {
if (!strcmp(arg, "power6")) return 1;
#endif
}
return 0;
}

#endif /* C_PGI */
static int __builtin_cpu_supports(const char *arg)
{
return 0;
}
#endif

static gotoblas_t *get_coretype(void) {

Expand All @@ -196,19 +240,23 @@ static gotoblas_t *get_coretype(void) {
#endif
if (__builtin_cpu_is("power8"))
return &gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
if (__builtin_cpu_is("power9"))
return &gotoblas_POWER9;
#endif
#ifdef HAVE_P10_SUPPORT
#if defined(_AIX) || defined(__clang__)
if (__builtin_cpu_is("power10"))
#else
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
#endif
return &gotoblas_POWER10;
#endif
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
if (__builtin_cpu_is("power10"))
return &gotoblas_POWER9;
#endif
#endif
return NULL;
}

Expand All @@ -233,7 +281,7 @@ static gotoblas_t *force_coretype(char * coretype) {
case 1: return (&gotoblas_POWER6);
#endif
case 2: return (&gotoblas_POWER8);
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
case 3: return (&gotoblas_POWER9);
#endif
#ifdef HAVE_P10_SUPPORT
Expand Down
Loading