Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cat: updates in vector-FLOPs benchmarks #279

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 29 additions & 29 deletions src/counter_analysis_toolkit/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ endif
ifeq ($(ARCH),POWER)
FLOP+=-maltivec -DPOWER
VECSRC=vec_fma_hp.o vec_fma_sp.o vec_fma_dp.o vec_nonfma_hp.o vec_nonfma_sp.o vec_nonfma_dp.o
VEC=-maltivec -O0 -DPOWER
VEC_FMA=-maltivec -O0 -DPOWER
VEC_ALL=$(VEC) -O0 -DPOWER
VEC=-maltivec -DPOWER
VEC_FMA=-maltivec -DPOWER
VEC_ALL=$(VEC) -DPOWER
endif
ifeq ($(ARCH),ARM)
FLOP+=-march=armv8.2-a+fp16 -DARM
Expand Down Expand Up @@ -109,58 +109,58 @@ weak_symbols.o: weak_symbols.c vec.h
-$(CC) -c $(CFLAGS) weak_symbols.c

vec.o: vec.c vec.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) -D$(ARCH) $(VEC_META) vec.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) -D$(ARCH) $(VEC_META) vec.c

vec_scalar_verify.o: vec_scalar_verify.c vec_scalar_verify.h cat_arch.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_ALL) vec_scalar_verify.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_ALL) vec_scalar_verify.c

vec_fma_hp.o: vec_fma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_hp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_hp.c

vec_fma_hp: vec_fma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_hp.c -o vec_fma_hp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_hp.c -o vec_fma_hp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_hp.c -o vec_fma_hp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_hp.c -o vec_fma_hp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_hp.c -o vec_fma_hp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_hp.c -o vec_fma_hp-512B.o

vec_fma_sp.o: vec_fma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_sp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_sp.c

vec_fma_sp: vec_fma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_sp.c -o vec_fma_sp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_sp.c -o vec_fma_sp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_sp.c -o vec_fma_sp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_sp.c -o vec_fma_sp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_sp.c -o vec_fma_sp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_sp.c -o vec_fma_sp-512B.o

vec_fma_dp.o: vec_fma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_dp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_dp.c

vec_fma_dp: vec_fma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_dp.c -o vec_fma_dp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_dp.c -o vec_fma_dp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_dp.c -o vec_fma_dp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_dp.c -o vec_fma_dp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_dp.c -o vec_fma_dp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_dp.c -o vec_fma_dp-512B.o

vec_nonfma_hp.o: vec_nonfma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_hp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_hp.c

vec_nonfma_hp: vec_nonfma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_hp.c -o vec_nonfma_hp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_hp.c -o vec_nonfma_hp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_hp.c -o vec_nonfma_hp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_hp.c -o vec_nonfma_hp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_hp.c -o vec_nonfma_hp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_hp.c -o vec_nonfma_hp-512B.o

vec_nonfma_sp.o: vec_nonfma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_sp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_sp.c

vec_nonfma_sp: vec_nonfma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_sp.c -o vec_nonfma_sp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_sp.c -o vec_nonfma_sp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_sp.c -o vec_nonfma_sp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_sp.c -o vec_nonfma_sp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_sp.c -o vec_nonfma_sp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_sp.c -o vec_nonfma_sp-512B.o

vec_nonfma_dp.o: vec_nonfma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_dp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_dp.c

vec_nonfma_dp: vec_nonfma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_dp.c -o vec_nonfma_dp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_dp.c -o vec_nonfma_dp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_dp.c -o vec_nonfma_dp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_dp.c -o vec_nonfma_dp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_dp.c -o vec_nonfma_dp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_dp.c -o vec_nonfma_dp-512B.o

cat_collect:
$(CC) $(CFLAGS) -fopenmp $(INCFLAGS) main.c $(wildcard *.o) -o cat_collect $(LDFLAGS)
Expand Down
45 changes: 5 additions & 40 deletions src/counter_analysis_toolkit/cat_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,38 +123,17 @@ typedef float64x2_t DP_VEC_TYPE;
#define ADD_VEC_SH(_I_,_J_) vaddh_f16( _I_ , _J_ );
#define MUL_VEC_SH(_I_,_J_) vmulh_f16( _I_ , _J_ );
#define SQRT_VEC_SH(_I_) vsqrth_f16( _I_ );
#define FMA_VEC_SH(_out_,_I_,_J_,_K_) {\
HP_VEC_TYPE arg1 = SET_VEC_PH(_I_);\
HP_VEC_TYPE arg2 = SET_VEC_PH(_J_);\
HP_VEC_TYPE arg3 = SET_VEC_PH(_K_);\
HP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PH( arg1 , arg2 , arg3 );\
_out_ = ((half*)&(argTmp))[0];\
}
#define FMA_VEC_SH(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#define SET_VEC_SS(_I_) _I_ ;
#define ADD_VEC_SS(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SS(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
SP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
_out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#define SET_VEC_SD(_I_) _I_ ;
#define ADD_VEC_SD(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SD(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
DP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
_out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#elif defined(POWER)
void test_hp_power_VEC( int instr_per_loop, uint64 iterations, int EventSet, FILE *fp );
Expand Down Expand Up @@ -187,25 +166,11 @@ typedef __vector double DP_VEC_TYPE;
#define SET_VEC_SS(_I_) _I_ ;
#define ADD_VEC_SS(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SS(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
SP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
_out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#define SET_VEC_SD(_I_) _I_ ;
#define ADD_VEC_SD(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SD(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
DP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
_out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#endif
Loading
Loading