Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates GPU version (for HIP-CPU tests); adds GPU version test (w/ HIP-CPU) to github actions #855

Merged
merged 14 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 40 additions & 7 deletions .github/scripts/run_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,44 @@ else
hdf=()
fi

## HIP
if [ "${HIP}" == "true" ]; then
echo
echo "enabling HIP"
echo
hip=(--with-hip HIPCC=g++ HIP_FLAGS="-O2 -g -std=c++17" HIP_PLATFORM=cpu HIP_INC=./external_libs/ROCm-HIP-CPU/include HIP_LIBS="-ltbb -lpthread -lstdc++ -lmpi_cxx")
else
hip=()
fi

## special testflags
if [ "${TESTFLAGS}" == "check-mcmodel-medium" ]; then
# note: this is a work-around as using the 'env:' parameter in the workflow 'CI.yml' with TESTFLAGS: FLAGS_CHECK=".."
# won't work as the FLAGS_CHECK string will then get split up and ./configure .. complains about unknown parameters.
# here, we re-define TESTFLAGS with a single quote around FLAGS_CHECK=".." to avoid the splitting.
# use FLAGS_CHECK
flags=(FLAGS_CHECK="-O3 -mcmodel=medium -std=f2008 -Wall -Wno-do-subscript -Wno-conversion -Wno-maybe-uninitialized")
TESTFLAGS="" # reset
else
flags=()
fi

# configuration
echo
echo "configuration:"
echo

# split TESTFLAGS into individual items
set -- ${TESTFLAGS}

./configure \
${adios[@]} \
${netcdf[@]} \
${hdf[@]} \
${petsc[@]} \
FC=gfortran MPIFC=mpif90 CC=gcc "${TESTFLAGS}"
"${adios[@]}" \
"${netcdf[@]}" \
"${hdf[@]}" \
"${hip[@]}" \
"${petsc[@]}" \
"${flags[@]}" \
FC=gfortran MPIFC=mpif90 CC=gcc "$@"

# checks
if [[ $? -ne 0 ]]; then echo "configuration failed:"; cat config.log; echo ""; echo "exiting..."; exit 1; fi
Expand All @@ -95,8 +122,14 @@ sed -i "s:IMAIN .*:IMAIN = ISTANDARD_OUTPUT:" setup/constants.h

# compilation
echo
echo "clean compilation:"
make clean; #make -j2 all
echo "clean:"
echo
make clean

echo
echo "compilation:"
echo
make -j4 all

# checks
if [[ $? -ne 0 ]]; then exit 1; fi
Expand Down
26 changes: 26 additions & 0 deletions .github/scripts/run_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ if [ "${PETSC}" == "true" ]; then
echo; echo "done PETSc"; echo
fi

## HIP
if [ "${HIP}" == "true" ]; then
echo
echo "HIP additionals installation:"
echo
sudo apt-get install -yq --no-install-recommends libtbb-dev
fi

# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
echo

# python3 pip upgrade might complain: "ERROR: launchpadlib 1.10.13 requires testresources"
sudo apt-get install -yq --no-install-recommends python3-testresources
# checks exit code
Expand Down Expand Up @@ -140,6 +152,20 @@ if [ "${ADIOS2}" == "true" ]; then
echo; echo "done ADIOS2"; echo
fi

## EMC model
if [ "${EMC_MODEL}" == "true" ]; then
echo
echo "EMC model installation:"
echo
echo "current dir: `pwd`"
cd DATA/IRIS_EMC/
wget --quiet --tries=3 https://ds.iris.edu/files/products/emc/emc-files/Alaska.JointInversion-RF+Vph+HV-1.Berg.2020-nc4.nc
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
ln -s Alaska.JointInversion-RF+Vph+HV-1.Berg.2020-nc4.nc model.nc
cd ../../
fi

# MPI
# github actions uses for Linux virtual machines a 2-core CPU environment
# see: https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
Expand Down
127 changes: 124 additions & 3 deletions .github/scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,85 @@ echo

# bash function for checking seismogram output with reference solutions
my_test(){
echo "testing seismograms:"
echo "######################################################################################################################"
echo "testing seismograms"
ln -s $WORKDIR/utils/scripts/compare_seismogram_correlations.py
./compare_seismogram_correlations.py REF_SEIS/ OUTPUT_FILES/
if [[ $? -ne 0 ]]; then exit 1; fi
./compare_seismogram_correlations.py REF_SEIS/ OUTPUT_FILES/ | grep min/max | cut -d \| -f 3 | awk '{print "correlation:",$1; if ($1 < 0.999 ){print $1,"failed"; exit 1;}else{ print $1,"good"; exit 0;}}'
if [[ $? -ne 0 ]]; then exit 1; fi
echo "######################################################################################################################"
}

my_kernel_test(){
# kernel value test - checks rho/kappa/mu kernel value outputs
echo "######################################################################################################################"
echo "testing kernel values"
file_ref=REF_KERNEL/output_solver.txt
file_out=output.log # captures the OUTPUT_FILES/output_solver.txt when running solver since IMAIN was set to standard out
if [ ! -e $file_ref ]; then echo "Please check if file $file_ref exists..."; ls -alR ./; exit 1; fi
if [ ! -e $file_out ]; then echo "Please check if file $file_out exists..."; ls -alR ./; exit 1; fi
# gets reference expected kernel values from REF_KERNEL/ folder
RHO=`grep -E 'maximum value of rho[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`
KAPPA=`grep -E 'maximum value of kappa[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`
MU=`grep -E 'maximum value of mu[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`
ALPHAV=`grep -E 'maximum value of alphav[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`
BETAV=`grep -E 'maximum value of betav[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`

# need at least rho & kappa (for acoustic kernels)
if [ "$RHO" == "" ]; then
echo " missing reference kernel values: RHO=$RHO | KAPPA=$KAPPA MU=$MU | ALPHAV=$ALPHAV BETAV=$BETAV"
echo
exit 1
else
echo " reference kernel values: RHO=$RHO | KAPPA=$KAPPA MU=$MU | ALPHAV=$ALPHAV BETAV=$BETAV"
fi
# compares with test output - using a relative tolerance of 0.001 (1 promille) with respect to expected value
# final test result
PASSED=0
# checks rho kernel value
if [ "$RHO" != "" ]; then
VAL=`grep -E 'maximum value of rho[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel rho : $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$RHO val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# checks kappa kernel value
if [ "$KAPPA" != "" ]; then
VAL=`grep -E 'maximum value of kappa[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel kappa : $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$KAPPA val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# checks mu kernel value
if [ "$MU" != "" ]; then
VAL=`grep -E 'maximum value of mu[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel mu : $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$MU val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# checks alphav kernel value (if anisotropic kernels)
if [ "$ALPHAV" != "" ]; then
VAL=`grep -E 'maximum value of alphav[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel alphav: $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$ALPHAV val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# checks betav kernel value (if anisotropic kernels)
if [ "$BETAV" != "" ]; then
VAL=`grep -E 'maximum value of betav[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel betav : $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$BETAV val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# overall pass
if [[ $PASSED -ne 0 ]]; then
echo "testing kernel values: failed"; exit 1;
else
echo "testing kernel values: all good"
fi
echo "######################################################################################################################"
}
# test example
cd $dir

Expand Down Expand Up @@ -68,7 +139,7 @@ fi
# hdf5 i/o example
if [ "${HDF5}" == "true" ]; then
echo
echo "HDF5 enabled test run"
echo "test run w/ HDF5"
echo
sed -i "s:^HDF5_ENABLED .*:HDF5_ENABLED = .true.:" DATA/Par_file
#sed -i "s:^HDF5_FOR_MOVIES .*:HDF5_FOR_MOVIES = .true.:" DATA/Par_file
Expand All @@ -80,13 +151,24 @@ fi
# adios
if [ "${ADIOS2}" == "true" ]; then
# turns on ADIOS
echo "turning on ADIOS"
sed -i "s:^ADIOS_ENABLED .*:ADIOS_ENABLED = .true.:" DATA/Par_file
fi

## GPU
if [ "${GPU}" == "true" ]; then
# turns on GPU
echo "turning on GPU"
sed -i "s:^GPU_MODE .*:GPU_MODE = .true.:" DATA/Par_file
fi

# save Par_file state
cp -v DATA/Par_file DATA/Par_file.bak

# use kernel script
if [ "${RUN_KERNEL}" == "true" ]; then
# use kernel script
./run_this_example.kernel.sh
./run_this_example_kernel.sh | tee output.log
else
# default script
./run_this_example.sh
Expand All @@ -107,9 +189,48 @@ if [ "${DEBUG}" == "true" ] || [ "${FULL_GRAVITY}" == "true" ] || [ "${RUN_KERNE
else
my_test
fi
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi

# kernel test
if [ "${RUN_KERNEL}" == "true" ]; then
# check kernel values
my_kernel_test
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
# clean up
rm -rf OUTPUT_FILES/ SEM/ output.log

# re-run kernel test w/ UNDO_ATT
UNDO_ATT=`grep ^UNDO_ATTENUATION DATA/Par_file | cut -d = -f 2 | tr -d ' '`
if [[ ${UNDO_ATT} == *"false"* ]]; then
echo
echo "*****************************************"
echo "run kernel w/ UNDO_ATTENUATION"
echo "*****************************************"
echo

# turns on UNDO_ATTENUATION
echo "turning on UNDO_ATTENUATION"
sed -i "s:^UNDO_ATTENUATION .*:UNDO_ATTENUATION = .true.:" DATA/Par_file

# use kernel script
./run_this_example_kernel.sh | tee output.log
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
# kernel test
my_kernel_test
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
fi
fi

# restore original Par_file
cp -v DATA/Par_file.bak DATA/Par_file

# cleanup
rm -rf OUTPUT_FILES* DATABASES_MPI*
if [ -e SEM ]; then rm -rf SEM/; fi

echo
echo "all good"
Expand Down
Loading