Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates GPU source kernels; adds GPU kernel testing in github actions #1759

Merged
merged 21 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
60dfad0
fixes typo
danielpeter Nov 8, 2024
40515c8
fixes deallocation for gpu simulation
danielpeter Nov 10, 2024
fadca31
adding comments and renaming variables for gpu kernels
danielpeter Nov 10, 2024
4144b0a
updates Kernel_2_acoustic (declares realw_const_p for rhostore array;…
danielpeter Nov 11, 2024
b013ef8
updates compute_stream usage for acoustic GPU kernels
danielpeter Nov 11, 2024
53d3339
updates stream usage for GPU kernels; renames variable (kappa inverse)
danielpeter Nov 11, 2024
156b14f
fixes hip kernel for elastic sources
danielpeter Nov 12, 2024
a0ae26d
updates gpu source kernels
danielpeter Nov 12, 2024
b4d8af3
updates adjoint source reading (ASDF & SU format)
danielpeter Nov 13, 2024
16db5a1
updates adding source contributions
danielpeter Nov 13, 2024
fd5e287
adding submodule HIP-CPU in external_libs/ for testing
danielpeter Nov 13, 2024
dd9e694
update HIP configuration; adds HIP GPU kernel test
danielpeter Nov 13, 2024
5c93a5d
updates github actions
danielpeter Nov 14, 2024
b575ce3
adds reference kernel solutions (homogeneous_acoustic and homogeneous…
danielpeter Nov 14, 2024
5c17b8e
updates github actions (for kernel runs)
danielpeter Nov 14, 2024
380880e
updates github action test
danielpeter Nov 14, 2024
3ea9bfa
updates github actions kernel testing
danielpeter Nov 14, 2024
eaf283e
updates testing
danielpeter Nov 14, 2024
5868a7e
updates testing w/ HDF5
danielpeter Nov 14, 2024
a4cd1c7
updates test script
danielpeter Nov 14, 2024
848ee8d
updates kernel testing
danielpeter Nov 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions .github/scripts/run_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,52 @@ else
adios=()
fi

## HDF5
if [ "${HDF5}" == "true" ]; then
echo
echo "enabling HDF5"
echo
hdf=(--with-hdf5 HDF5_INC="/usr/include/hdf5/openmpi/" HDF5_LIBS="-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi")
else
hdf=()
fi

## HIP
if [ "${HIP}" == "true" ]; then
echo
echo "enabling HIP"
echo
hip=(--with-hip HIPCC=g++ HIP_FLAGS="-O2 -g -std=c++17" HIP_PLATFORM=cpu HIP_INC=./external_libs/ROCm-HIP-CPU/include HIP_LIBS="-ltbb -lpthread -lstdc++")
else
hip=()
fi

## special testflags
if [ "${TESTFLAGS}" == "check-mcmodel-medium" ]; then
# note: this is a work-around as using the 'env:' parameter in the workflow 'CI.yml' with TESTFLAGS: FLAGS_CHECK=".."
# won't work as the FLAGS_CHECK string will then get split up and ./configure .. complains about unknown parameters.
# here, we re-define TESTFLAGS with a single quote around FLAGS_CHECK=".." to avoid the splitting.
# use FLAGS_CHECK
flags=(FLAGS_CHECK="-O3 -mcmodel=medium -std=f2008 -Wall -Wno-do-subscript -Wno-conversion -Wno-maybe-uninitialized")
TESTFLAGS="" # reset
else
flags=()
fi

# configuration
echo
echo "configuration:"
echo

# split TESTFLAGS into individual items
set -- ${TESTFLAGS}

./configure \
${adios[@]} \
FC=gfortran MPIFC=mpif90 CC=gcc ${TESTFLAGS}
"${adios[@]}" \
"${hdf[@]}" \
"${hip[@]}" \
"${flags[@]}" \
FC=gfortran MPIFC=mpif90 CC=gcc "$@"

# checks
if [[ $? -ne 0 ]]; then echo "configuration failed:"; cat config.log; echo ""; echo "exiting..."; exit 1; fi
Expand All @@ -62,8 +100,14 @@ sed -i "s:IMAIN .*:IMAIN = ISTANDARD_OUTPUT:" setup/constants.h

# compilation
echo
echo "compilation:"
make clean; make -j2 all
echo "clean"
echo
make clean

echo
echo "compilation"
echo
make -j4 all

# checks
if [[ $? -ne 0 ]]; then exit 1; fi
Expand Down
14 changes: 11 additions & 3 deletions .github/scripts/run_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ if [[ $? -ne 0 ]]; then exit 1; fi
# fortran/openMPI compiler
sudo apt-get install -yq --no-install-recommends gfortran g++ openmpi-bin libopenmpi-dev

# parallel hdf5
if [[ "${TEST}" == *"with-hdf5"* ]]; then
## parallel HDF5
if [ "${HDF5}" == "true" ]; then
echo
echo "additional installation: ${TEST}"
echo "HDF5 additional installation:"
echo
sudo apt-get install -yq --no-install-recommends libhdf5-mpi-dev
## checks installation paths
Expand All @@ -34,6 +34,14 @@ if [[ "${TEST}" == *"with-hdf5"* ]]; then
#echo
fi

## HIP
if [ "${HIP}" == "true" ]; then
echo
echo "HIP additionals installation:"
echo
sudo apt-get install -yq --no-install-recommends libtbb-dev
fi

# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
echo
Expand Down
132 changes: 121 additions & 11 deletions .github/scripts/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,67 @@ echo

# bash function for checking seismogram output with reference solutions
my_test(){
echo "testing seismograms:"
echo "######################################################################################################################"
echo "testing seismograms"
ln -s $WORKDIR/utils/scripts/compare_seismogram_correlations.py
./compare_seismogram_correlations.py REF_SEIS/ OUTPUT_FILES/
if [[ $? -ne 0 ]]; then exit 1; fi
./compare_seismogram_correlations.py REF_SEIS/ OUTPUT_FILES/ | grep min/max | cut -d \| -f 3 | awk '{print "correlation:",$1; if ($1 < 0.999 ){print $1,"failed"; exit 1;}else{ print $1,"good"; exit 0;}}'
if [[ $? -ne 0 ]]; then exit 1; fi
echo "######################################################################################################################"
}

my_kernel_test(){
# kernel value test - checks rho/kappa/mu kernel value outputs
echo "######################################################################################################################"
echo "testing kernel values"
file_ref=REF_KERNEL/output_solver.txt
file_out=output.log # captures the OUTPUT_FILES/output_solver.txt when running solver since IMAIN was set to standard out
if [ ! -e $file_ref ]; then echo "Please check if file $file_ref exists..."; ls -alR ./; exit 1; fi
if [ ! -e $file_out ]; then echo "Please check if file $file_out exists..."; ls -alR ./; exit 1; fi
# gets reference expected kernel values from REF_KERNEL/ folder
RHO=`grep -E 'maximum value of rho[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`
KAPPA=`grep -E 'maximum value of kappa[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`
MU=`grep -E 'maximum value of mu[[:space:]]+kernel' $file_ref | cut -d = -f 2 | tr -d ' '`
# need at least rho & kappa (for acoustic kernels)
if [ "$RHO" == "" ] || [ "$KAPPA" == "" ]; then
echo " missing reference kernel values: RHO=$RHO KAPPA=$KAPPA MU=$MU"
echo
exit 1
else
echo " reference kernel values: RHO=$RHO KAPPA=$KAPPA MU=$MU"
fi
# compares with test output - using a relative tolerance of 0.001 (1 promille) with respect to expected value
# final test result
PASSED=0
# checks rho kernel value
if [ "$RHO" != "" ]; then
VAL=`grep -E 'maximum value of rho[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel rho : $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$RHO val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# checks kappa kernel value
if [ "$KAPPA" != "" ]; then
VAL=`grep -E 'maximum value of kappa[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel kappa : $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$KAPPA val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# checks mu kernel value (if available for elastic kernel)
if [ "$MU" != "" ]; then
VAL=`grep -E 'maximum value of mu[[:space:]]+kernel' $file_out | cut -d = -f 2 | tr -d ' '`
echo "kernel mu : $VAL"
echo "" | awk '{diff=ex-val;diff_abs=(diff >= 0)? diff:-diff;diff_rel=diff_abs/ex;print " value: expected = "ex" gotten = "val" - difference absolute = "diff_abs" relative = "diff_rel; if (diff_rel>0.001){print " failed"; exit 1;}else{print " good"; exit 0;} }' ex=$MU val=$VAL
if [[ $? -ne 0 ]]; then PASSED=1; fi
fi
# overall pass
if [[ $PASSED -ne 0 ]]; then
echo "testing kernel values: failed"; exit 1;
else
echo "testing kernel values: all good"
fi
echo "######################################################################################################################"
}

# test example
Expand All @@ -37,7 +92,7 @@ cd $dir
# limit time steps for testing
sed -i "s:^NSTEP .*:NSTEP = 200:" DATA/Par_file
# shortens output interval to avoid timeouts
sed -i "s:^NTSTEP_BETWEEN_OUTPUT_INFO .*:NTSTEP_BETWEEN_OUTPUT_INFO = 50:" DATA/Par_file
sed -i "s:^NTSTEP_BETWEEN_OUTPUT_INFO .*:NTSTEP_BETWEEN_OUTPUT_INFO = 100:" DATA/Par_file

# limit time steps for specific examples
# simple mesh example
Expand Down Expand Up @@ -105,27 +160,45 @@ if [ "$TESTDIR" == "EXAMPLES/applications/meshfem3D_examples/sep_bathymetry/" ];
sed -i "s:^NSTEP .*:NSTEP = 1000:" DATA/Par_file
fi

# hdf5 i/o example
if [[ "${TEST}" == *"with-hdf5"* ]]; then
## HDF5 - i/o example
if [ "${HDF5}" == "true" ]; then
echo
echo "test run: ${TEST}"
echo "test run w/ HDF5"
echo
# turns on HDF5
echo "turning on HDF5"
sed -i "s:^HDF5_ENABLED .*:HDF5_ENABLED = .true.:" DATA/Par_file
sed -i "s:^HDF5_FOR_MOVIES .*:HDF5_FOR_MOVIES = .true.:" DATA/Par_file
sed -i "s:^HDF5_IO_NODES .*:HDF5_IO_NODES = 1:" DATA/Par_file
# replaces run script
cp -v run_this_example_HDF5_IO_server.sh run_this_example.sh
fi

# adios
## adios
if [ "${ADIOS2}" == "true" ]; then
# turns on ADIOS
echo "turning on ADIOS"
sed -i "s:^ADIOS_ENABLED .*:ADIOS_ENABLED = .true.:" DATA/Par_file
fi

# default script
./run_this_example.sh
## GPU
if [ "${GPU}" == "true" ]; then
# turns on GPU
echo "turning on GPU"
sed -i "s:^GPU_MODE .*:GPU_MODE = .true.:" DATA/Par_file
fi

# save Par_file state
cp -v DATA/Par_file DATA/Par_file.bak

# use kernel script
if [ "${RUN_KERNEL}" == "true" ]; then
# use kernel script
./run_this_example_kernel.sh | tee output.log
else
# default script
./run_this_example.sh
fi
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi

Expand All @@ -136,15 +209,52 @@ echo `date`
echo

# seismogram comparison
if [ "${DEBUG}" == "true" ]; then
if [ "${DEBUG}" == "true" ] || [ "${RUN_KERNEL}" == "true" ]; then
# no comparisons
continue
: # do nothing
else
my_test
fi
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi

# kernel test
if [ "${RUN_KERNEL}" == "true" ]; then
# check kernel values
my_kernel_test
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
# clean up
rm -rf OUTPUT_FILES/ SEM/ output.log

# re-run kernel test w/ UNDO_ATT
echo
echo "*****************************************"
echo "run kernel w/ UNDO_ATTENUATION_AND_OR_PML"
echo "*****************************************"
echo

# turns on UNDO_ATTENUATION_AND_OR_PML
echo "turning on UNDO_ATTENUATION_AND_OR_PML"
sed -i "s:^UNDO_ATTENUATION_AND_OR_PML .*:UNDO_ATTENUATION_AND_OR_PML = .true.:" DATA/Par_file

# use kernel script
./run_this_example_kernel.sh | tee output.log
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
# kernel test
my_kernel_test
# checks exit code
if [[ $? -ne 0 ]]; then exit 1; fi
fi

# restore original Par_file
cp -v DATA/Par_file.bak DATA/Par_file

# cleanup
rm -rf OUTPUT_FILES/ DATABASES_MPI/
rm -rf OUTPUT_FILES/
if [ -e DATABASES_MPI ]; then rm -rf DATABASES_MPI/; fi
if [ -e SEM ]; then rm -rf SEM/; fi

echo
echo "all good"
Expand Down
Loading