diff --git a/paper/.latexmkrc b/paper/.latexmkrc new file mode 100644 index 0000000..ddcb138 --- /dev/null +++ b/paper/.latexmkrc @@ -0,0 +1,6 @@ + +sub build_header { + system("ruby ./prep.rb") +} + +build_header() diff --git a/paper/README.md b/paper/README.md new file mode 100644 index 0000000..4a1e715 --- /dev/null +++ b/paper/README.md @@ -0,0 +1,27 @@ +# JuliaCon-proceeding-IGG + +## Paper dependencies + +The document can be built locally, the following dependencies need to be installed: +- Ruby +- latexmk + +## Build process + +Build the paper using: +``` +$ latexmk -bibtex -pdf paper.tex +``` + +Clean up temporary files using: +``` +$ latexmk -c +``` + +## Paper metadata + +**IMPORTANT** +Some information for building the document (such as the title and keywords) +is provided through the `paper.yml` file and not through the usual `\title` +command. Respecting the process is important to avoid build errors when +submitting your work. diff --git a/paper/bib.tex b/paper/bib.tex new file mode 100644 index 0000000..8e1819a --- /dev/null +++ b/paper/bib.tex @@ -0,0 +1,4 @@ +% **************GENERATED FILE, DO NOT EDIT************** + +\bibliographystyle{juliacon} +\bibliography{ref.bib} diff --git a/paper/codes/benchmarks/Project.toml b/paper/codes/benchmarks/Project.toml new file mode 100644 index 0000000..d53140d --- /dev/null +++ b/paper/codes/benchmarks/Project.toml @@ -0,0 +1,4 @@ +[deps] +ImplicitGlobalGrid = "4d7a3746-15be-11ea-1130-334b0c4f5fa0" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +ParallelStencil = "94395366-693c-11ea-3b26-d9b7aac5d958" diff --git a/paper/codes/benchmarks/daint_run_all.sh b/paper/codes/benchmarks/daint_run_all.sh new file mode 100644 index 0000000..4e6c560 --- /dev/null +++ b/paper/codes/benchmarks/daint_run_all.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +sbatch daint_runme_1.sh +sbatch daint_runme_8.sh +sbatch daint_runme_27.sh +sbatch daint_runme_64.sh +sbatch daint_runme_125.sh +sbatch daint_runme_216.sh +sbatch daint_runme_512.sh +sbatch daint_runme_1000.sh +sbatch daint_runme_2197.sh diff --git a/paper/codes/benchmarks/daint_runme_1.sh b/paper/codes/benchmarks/daint_runme_1.sh new file mode 100644 index 0000000..31331a7 --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_1.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_1" +#SBATCH --output=diff3D_1.%j.o +#SBATCH --error=diff3D_1.%j.e +#SBATCH --time=00:30:00 +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_1000.sh b/paper/codes/benchmarks/daint_runme_1000.sh new file mode 100644 index 0000000..9c36343 --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_1000.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_1000" +#SBATCH --output=diff3D_1000.%j.o +#SBATCH --error=diff3D_1000.%j.e +#SBATCH --time=00:50:00 +#SBATCH --nodes=1000 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_125.sh b/paper/codes/benchmarks/daint_runme_125.sh new file mode 100644 index 0000000..2e90adb --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_125.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_125" +#SBATCH --output=diff3D_125.%j.o +#SBATCH --error=diff3D_125.%j.e +#SBATCH --time=00:30:00 +#SBATCH --nodes=125 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_216.sh b/paper/codes/benchmarks/daint_runme_216.sh new file mode 100644 index 0000000..99c597c --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_216.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_216" +#SBATCH --output=diff3D_216.%j.o +#SBATCH --error=diff3D_216.%j.e +#SBATCH --time=00:45:00 +#SBATCH --nodes=216 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_2197.sh b/paper/codes/benchmarks/daint_runme_2197.sh new file mode 100644 index 0000000..cc31492 --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_2197.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_2197" +#SBATCH --output=diff3D_2197.%j.o +#SBATCH --error=diff3D_2197.%j.e +#SBATCH --time=00:50:00 +#SBATCH --nodes=2197 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_27.sh b/paper/codes/benchmarks/daint_runme_27.sh new file mode 100644 index 0000000..8b35a21 --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_27.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_27" +#SBATCH --output=diff3D_27.%j.o +#SBATCH --error=diff3D_27.%j.e +#SBATCH --time=00:30:00 +#SBATCH --nodes=27 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_512.sh b/paper/codes/benchmarks/daint_runme_512.sh new file mode 100644 index 0000000..39dfb9a --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_512.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_512" +#SBATCH --output=diff3D_512.%j.o +#SBATCH --error=diff3D_512.%j.e +#SBATCH --time=00:50:00 +#SBATCH --nodes=512 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_64.sh b/paper/codes/benchmarks/daint_runme_64.sh new file mode 100644 index 0000000..d33fd16 --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_64.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_64" +#SBATCH --output=diff3D_64.%j.o +#SBATCH --error=diff3D_64.%j.e +#SBATCH --time=00:30:00 +#SBATCH --nodes=64 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_runme_8.sh b/paper/codes/benchmarks/daint_runme_8.sh new file mode 100644 index 0000000..22e1c73 --- /dev/null +++ b/paper/codes/benchmarks/daint_runme_8.sh @@ -0,0 +1,30 @@ +#!/bin/bash -l +#SBATCH --job-name="diff3D_8" +#SBATCH --output=diff3D_8.%j.o +#SBATCH --error=diff3D_8.%j.e +#SBATCH --time=00:30:00 +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --partition=normal +#SBATCH --constraint=gpu +#SBATCH --account c23 + +module load daint-gpu +module load Julia/1.7.2-CrayGNU-21.09-cuda +module load cray-hdf5-parallel + +export JULIA_HDF5_PATH=$HDF5_ROOT +export JULIA_CUDA_MEMORY_POOL=none + +export IGG_CUDAAWARE_MPI=1 +export MPICH_RDMA_ENABLED_CUDA=1 + +scp diff3D.jl daint_submit_pareff.sh $SCRATCH/diff3D + +pushd $SCRATCH/diff3D + +chmod +x *.sh + +srun daint_submit_pareff.sh + +scp out_diff3D_pareff* $HOME/diff3D diff --git a/paper/codes/benchmarks/daint_submit_pareff.sh b/paper/codes/benchmarks/daint_submit_pareff.sh new file mode 100644 index 0000000..0b9c8d3 --- /dev/null +++ b/paper/codes/benchmarks/daint_submit_pareff.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +RUN="diff3D" + +for ie in {1..20}; do + + # RDMA CUDA-aware + LD_PRELOAD="/usr/lib64/libcuda.so:/usr/local/cuda/lib64/libcudart.so" julia --project --check-bounds=no -O3 "$RUN".jl + + # no RDMA + # julia --project --check-bounds=no -O3 "$RUN".jl + +done diff --git a/paper/codes/benchmarks/diff3D.jl b/paper/codes/benchmarks/diff3D.jl new file mode 100644 index 0000000..4e3def9 --- /dev/null +++ b/paper/codes/benchmarks/diff3D.jl @@ -0,0 +1,72 @@ +const USE_GPU = true +using ImplicitGlobalGrid +using ParallelStencil +using ParallelStencil.FiniteDifferences3D +@static if USE_GPU + @init_parallel_stencil(CUDA, Float64, 3); +else + @init_parallel_stencil(Threads, Float64, 3); +end + +@parallel function diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz) + @inn(T2) = @inn(T) + dt*(lam*@inn(Ci)*(@d2_xi(T)*_dx^2 + @d2_yi(T)*_dy^2 + @d2_zi(T)*_dz^2)); + return +end + +function diffusion3D() +# Physics +lam = 1.0; # Thermal conductivity +c0 = 2.0; # Heat capacity +lx, ly, lz = 1.0, 1.0, 1.0; # Length of computational domain in dimension x, y and z + +# Numerics +nx, ny, nz = 512, 512, 512; # Number of gridpoints in dimensions x, y and z +nt = 100; # Number of time steps +me, dims, nprocs = init_global_grid(nx, ny, nz); +dx = lx/(nx_g()-1); # Space step in x-dimension +dy = ly/(ny_g()-1); # Space step in y-dimension +dz = lz/(nz_g()-1); # Space step in z-dimension +_dx, _dy, _dz = 1.0/dx, 1.0/dy, 1.0/dz; + +# Array initializations +T = @zeros(nx, ny, nz); +T2 = @zeros(nx, ny, nz); +Ci = @zeros(nx, ny, nz); + +# Initial conditions +Ci .= 1/c0; # 1/Heat capacity +T .= 1.7; +T2 .= T; # Assign also T2 to get correct boundary conditions. + +GC.enable(false) # uncomment for prof, mtp + +# Time loop +dt = min(dx^2,dy^2,dz^2)/lam/maximum(Ci)/8.1; # Time step for 3D Heat diffusion +for it = 1:nt + if (it == 11) tic(); end # Start measuring time. + @hide_communication (16, 4, 2) begin + @parallel diffusion3D_step!(T2, T, Ci, lam, dt, _dx, _dy, _dz); + update_halo!(T2); + end + T, T2 = T2, T; +end +time_s = toc() + +GC.enable(true) # uncomment for prof, mtp + +# Performance +A_eff = (2*1+1)*1/1e9*nx*ny*nz*sizeof(Data.Number); # Effective main memory access per iteration [GB] (Lower bound of required memory access: T has to be read and written: 2 whole-array memaccess; Ci has to be read: : 1 whole-array memaccess) +t_it = time_s/(nt-10); # Execution time per iteration [s] +T_eff = A_eff/t_it; # Effective memory throughput [GB/s] +if (me==0) println("time_s=$time_s T_eff=$T_eff"); end + +if me==0 + open("./out_diff3D_pareff_nordma_$(nprocs).txt","a") do io + println(io, "$(nprocs) $(nx) $(ny) $(nz) $(nt-10) $(time_s) $(A_eff) $(t_it) $(T_eff)") + end +end + +finalize_global_grid(); +end + +diffusion3D() diff --git a/paper/codes/diffusion3D_multixpu_hidecomm_clean.jl b/paper/codes/diffusion3D_multixpu_hidecomm_clean.jl new file mode 100644 index 0000000..4fa7273 --- /dev/null +++ b/paper/codes/diffusion3D_multixpu_hidecomm_clean.jl @@ -0,0 +1,46 @@ +using ImplicitGlobalGrid +using ParallelStencil +using ParallelStencil.FiniteDifferences3D +@init_parallel_stencil(CUDA, Float64, 3) + +@parallel function step!(T2,T,Ci,lam,dt,dx,dy,dz) + @inn(T2) = @inn(T) + dt*( + lam*@inn(Ci)*(@d2_xi(T)/dx^2 + + @d2_yi(T)/dy^2 + + @d2_zi(T)/dz^2 ) ) + return +end + +function diffusion3D() + # Physics + lam = 1.0 #Thermal conductivity + c0 = 2.0 #Heat capacity + lx=ly=lz = 1.0 #Domain length x|y|z + + # Numerics + nx=ny=nz = 512 #Nb gridpoints x|y|z + nt = 100 #Nb time steps + me, = init_global_grid(nx, ny, nz) + dx = lx/(nx_g()-1) #Space step in x + dy = ly/(ny_g()-1) #Space step in y + dz = lz/(nz_g()-1) #Space step in z + + # Initial conditions + T = @ones(nx,ny,nz).*1.7 #Temperature + T2 = copy(T) #Temperature (2nd) + Ci = @ones(nx,ny,nz)./c0 #1/Heat capacity + + # Time loop + dt = min(dx^2,dy^2,dz^2)/lam/maximum(Ci)/6.1 + for it = 1:nt + @hide_communication (16, 2, 2) begin + @parallel step!(T2,T,Ci,lam,dt,dx,dy,dz) + update_halo!(T2) + end + T, T2 = T2, T + end + + finalize_global_grid() +end + +diffusion3D() \ No newline at end of file diff --git a/paper/data/data_raw/out_diff3D_pareff_1.txt b/paper/data/data_raw/out_diff3D_pareff_1.txt new file mode 100644 index 0000000..1c29abc --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_1.txt @@ -0,0 +1,20 @@ +1 512 512 512 90 0.9340479373931885 3.221225472 0.010378310415479873 310.3805285295137 +1 512 512 512 90 0.9466681480407715 3.221225472 0.010518534978230795 306.2427874857727 +1 512 512 512 90 0.9535219669342041 3.221225472 0.010594688521491157 304.0415454843996 +1 512 512 512 90 0.9361491203308105 3.221225472 0.010401656892564561 309.68388068083993 +1 512 512 512 90 0.9420769214630127 3.221225472 0.01046752134958903 307.7352664894703 +1 512 512 512 90 0.93709397315979 3.221225472 0.010412155257331 309.37163271091225 +1 512 512 512 90 0.9356751441955566 3.221225472 0.01039639049106174 309.84075432424714 +1 512 512 512 90 0.9484589099884033 3.221225472 0.010538432333204482 305.6645780084924 +1 512 512 512 90 0.943687915802002 3.221225472 0.01048542128668891 307.2099235620889 +1 512 512 512 90 0.9328708648681641 3.221225472 0.01036523183186849 310.7721587177781 +1 512 512 512 90 0.938805103302002 3.221225472 0.010431167814466688 308.80775089559717 +1 512 512 512 90 0.9378070831298828 3.221225472 0.010420078701443142 309.13638603841565 +1 512 512 512 90 0.9503159523010254 3.221225472 0.01055906613667806 305.0672692361235 +1 512 512 512 90 0.9375438690185547 3.221225472 0.010417154100206162 309.22317564028833 +1 512 512 512 90 0.9450640678405762 3.221225472 0.01050071186489529 306.76258080833657 +1 512 512 512 90 0.9377551078796387 3.221225472 0.010419501198662652 309.1535199797708 +1 512 512 512 90 0.9454519748687744 3.221225472 0.010505021942986383 306.6367199880656 +1 512 512 512 90 0.9402899742126465 3.221225472 0.010447666380140516 308.3200931954602 +1 512 512 512 90 0.9480838775634766 3.221225472 0.010534265306260851 305.7854893862909 +1 512 512 512 90 0.9321670532226562 3.221225472 0.010357411702473958 311.0068002057485 diff --git a/paper/data/data_raw/out_diff3D_pareff_1000.txt b/paper/data/data_raw/out_diff3D_pareff_1000.txt new file mode 100644 index 0000000..b69576e --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_1000.txt @@ -0,0 +1,20 @@ +1000 512 512 512 90 1.0334599018096924 3.221225472 0.01148288779788547 280.5239874061276 +1000 512 512 512 90 1.0170190334320068 3.221225472 0.011300211482577854 285.0588661076244 +1000 512 512 512 90 1.0364751815795898 3.221225472 0.011516390906439886 279.7078961776743 +1000 512 512 512 90 1.0034151077270508 3.221225472 0.011149056752522786 288.92358730446927 +1000 512 512 512 90 1.0056180953979492 3.221225472 0.011173534393310546 288.2906481165447 +1000 512 512 512 90 1.0111370086669922 3.221225472 0.011234855651855469 286.71712141383904 +1000 512 512 512 90 1.0050580501556396 3.221225472 0.011167311668395996 288.4512913807372 +1000 512 512 512 90 1.020353078842163 3.221225472 0.01133725643157959 284.12742460577783 +1000 512 512 512 90 0.9975872039794922 3.221225472 0.011084302266438802 290.6114786993196 +1000 512 512 512 90 1.010585069656372 3.221225472 0.011228722996181911 286.87371423226926 +1000 512 512 512 90 1.0133888721466064 3.221225472 0.011259876357184517 286.0800038842925 +1000 512 512 512 90 0.9983890056610107 3.221225472 0.011093211174011231 290.3780899390583 +1000 512 512 512 90 0.9993908405303955 3.221225472 0.01110434267255995 290.0870017241094 +1000 512 512 512 90 0.9988658428192139 3.221225472 0.011098509364657932 290.23946965866094 +1000 512 512 512 90 1.0087361335754395 3.221225472 0.011208179261949327 287.3995317808438 +1000 512 512 512 90 1.0233879089355469 3.221225472 0.011370976765950521 283.2848521549795 +1000 512 512 512 90 1.0156311988830566 3.221225472 0.01128479109870063 285.44839189543376 +1000 512 512 512 90 0.9957358837127686 3.221225472 0.011063732041252983 291.1517976022123 +1000 512 512 512 90 0.9934961795806885 3.221225472 0.011038846439785428 291.8081603518179 +1000 512 512 512 90 0.9957840442657471 3.221225472 0.011064267158508301 291.1377162040879 diff --git a/paper/data/data_raw/out_diff3D_pareff_125.txt b/paper/data/data_raw/out_diff3D_pareff_125.txt new file mode 100644 index 0000000..b371875 --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_125.txt @@ -0,0 +1,20 @@ +125 512 512 512 90 0.9970932006835938 3.221225472 0.01107881334092882 290.7554602531051 +125 512 512 512 90 1.0150079727172852 3.221225472 0.011277866363525391 285.6236603776412 +125 512 512 512 90 1.0006930828094482 3.221225472 0.011118812031216091 289.70949980595066 +125 512 512 512 90 1.0041658878326416 3.221225472 0.011157398753696018 288.70756913056744 +125 512 512 512 90 1.0230579376220703 3.221225472 0.011367310418023004 283.37622124691075 +125 512 512 512 90 1.0218260288238525 3.221225472 0.01135362254248725 283.71785832632787 +125 512 512 512 90 1.0313291549682617 3.221225472 0.011459212832980686 281.1035556237346 +125 512 512 512 90 1.0081279277801514 3.221225472 0.011201421419779459 287.57292055024044 +125 512 512 512 90 1.0158588886260986 3.221225472 0.01128732098473443 285.38441286081576 +125 512 512 512 90 0.9918308258056641 3.221225472 0.011020342508951822 292.29812679446206 +125 512 512 512 90 1.0006229877471924 3.221225472 0.011118033197191027 289.7297943681121 +125 512 512 512 90 1.0198469161987305 3.221225472 0.011331632402208117 284.26844056221785 +125 512 512 512 90 1.0079660415649414 3.221225472 0.011199622684054904 287.6191067209893 +125 512 512 512 90 0.996452808380127 3.221225472 0.0110716978708903 290.94232064164646 +125 512 512 512 90 1.016430139541626 3.221225472 0.011293668217129177 285.2240219979499 +125 512 512 512 90 1.0135829448699951 3.221225472 0.011262032720777724 286.0252275823215 +125 512 512 512 90 0.9945950508117676 3.221225472 0.011051056120130751 291.4857581921218 +125 512 512 512 90 1.0017030239105225 3.221225472 0.011130033599005804 289.41740771454073 +125 512 512 512 90 1.0160350799560547 3.221225472 0.011289278666178385 285.3349241568895 +125 512 512 512 90 0.9946188926696777 3.221225472 0.011051321029663086 291.47877103143054 diff --git a/paper/data/data_raw/out_diff3D_pareff_216.txt b/paper/data/data_raw/out_diff3D_pareff_216.txt new file mode 100644 index 0000000..85727d3 --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_216.txt @@ -0,0 +1,20 @@ +216 512 512 512 90 1.0110080242156982 3.221225472 0.011233422491285536 286.75370079767805 +216 512 512 512 90 1.003983974456787 3.221225472 0.011155377493964301 288.7598804919751 +216 512 512 512 90 0.9931049346923828 3.221225472 0.011034499274359809 291.92312146732064 +216 512 512 512 90 0.9962389469146729 3.221225472 0.011069321632385254 291.00477689398207 +216 512 512 512 90 0.9913389682769775 3.221225472 0.01101487742529975 292.4431518957498 +216 512 512 512 90 0.9879758358001709 3.221225472 0.010977509286668565 293.4386469535451 +216 512 512 512 90 0.9968409538269043 3.221225472 0.011076010598076714 290.82903482950326 +216 512 512 512 90 0.9944860935211182 3.221225472 0.01104984548356798 291.5176937804447 +216 512 512 512 90 0.9922730922698975 3.221225472 0.01102525658077664 292.1678464713872 +216 512 512 512 90 0.9938778877258301 3.221225472 0.011043087641398112 291.69608868486495 +216 512 512 512 90 1.0026028156280518 3.221225472 0.01114003128475613 289.15766838176495 +216 512 512 512 90 1.000662088394165 3.221225472 0.011118467648824055 289.71847324129175 +216 512 512 512 90 0.9929680824279785 3.221225472 0.011032978693644205 291.9633547244734 +216 512 512 512 90 0.9897878170013428 3.221225472 0.010997642411126031 292.901455746658 +216 512 512 512 90 0.9945991039276123 3.221225472 0.011051101154751247 291.48457035117127 +216 512 512 512 90 0.994642972946167 3.221225472 0.011051588588290744 291.4717143391419 +216 512 512 512 90 0.9960529804229736 3.221225472 0.01106725533803304 291.0591084792394 +216 512 512 512 90 0.991070032119751 3.221225472 0.01101188924577501 292.5225090904274 +216 512 512 512 90 0.996103048324585 3.221225472 0.011067811648050945 291.0444787490815 +216 512 512 512 90 0.9962210655212402 3.221225472 0.011069122950236003 291.01000020343264 diff --git a/paper/data/data_raw/out_diff3D_pareff_2197.txt b/paper/data/data_raw/out_diff3D_pareff_2197.txt new file mode 100644 index 0000000..66f2021 --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_2197.txt @@ -0,0 +1,20 @@ +2197 512 512 512 90 1.006438970565796 3.221225472 0.011182655228508843 288.05551152000737 +2197 512 512 512 90 1.000309944152832 3.221225472 0.011114554935031468 289.8204643217124 +2197 512 512 512 90 1.021346092224121 3.221225472 0.011348289913601345 283.8511790344061 +2197 512 512 512 90 1.0026910305023193 3.221225472 0.011141011450025771 289.13222883300676 +2197 512 512 512 90 1.0015809535980225 3.221225472 0.01112867726220025 289.4526812221646 +2197 512 512 512 90 1.0017869472503662 3.221225472 0.011130966080559624 289.3931621645952 +2197 512 512 512 90 0.9995851516723633 3.221225472 0.01110650168524848 290.0306111940173 +2197 512 512 512 90 1.015455961227417 3.221225472 0.011282844013637966 285.4976518426022 +2197 512 512 512 90 1.037642002105713 3.221225472 0.011529355578952365 279.3933667793688 +2197 512 512 512 90 1.0024449825286865 3.221225472 0.011138277583652073 289.2031957192262 +2197 512 512 512 90 1.0173060894012451 3.221225472 0.011303400993347168 284.9784302880092 +2197 512 512 512 90 1.056675910949707 3.221225472 0.011740843454996744 274.36065256700874 +2197 512 512 512 90 1.0010077953338623 3.221225472 0.011122308837042915 289.6184163913602 +2197 512 512 512 90 1.015197992324829 3.221225472 0.0112799776924981 285.57019879058083 +2197 512 512 512 90 0.9989988803863525 3.221225472 0.01109998755984836 290.2008182110076 +2197 512 512 512 90 0.9983718395233154 3.221225472 0.01109302043914795 290.3830827384125 +2197 512 512 512 90 1.011354923248291 3.221225472 0.011237276924981011 286.65534306083174 +2197 512 512 512 90 1.0115690231323242 3.221225472 0.01123965581258138 286.59467208900145 +2197 512 512 512 90 0.9956479072570801 3.221225472 0.011062754525078667 291.17752406940383 +2197 512 512 512 90 1.017198085784912 3.221225472 0.01130220095316569 285.00868860394405 diff --git a/paper/data/data_raw/out_diff3D_pareff_27.txt b/paper/data/data_raw/out_diff3D_pareff_27.txt new file mode 100644 index 0000000..4f2fb6f --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_27.txt @@ -0,0 +1,20 @@ +27 512 512 512 90 0.9870240688323975 3.221225472 0.010966934098137749 293.7216037932592 +27 512 512 512 90 0.9850809574127197 3.221225472 0.010945343971252441 294.30098135430325 +27 512 512 512 90 0.9811930656433105 3.221225472 0.010902145173814562 295.4671232719351 +27 512 512 512 90 0.9798460006713867 3.221225472 0.01088717778523763 295.87332323789104 +27 512 512 512 90 0.9858770370483398 3.221225472 0.010954189300537109 294.0633381095629 +27 512 512 512 90 0.9844610691070557 3.221225472 0.010938456323411729 294.4862946616669 +27 512 512 512 90 0.9826450347900391 3.221225472 0.010918278164333767 295.0305371887875 +27 512 512 512 90 0.9798588752746582 3.221225472 0.01088732083638509 295.8694356865799 +27 512 512 512 90 0.9865620136260986 3.221225472 0.010961800151401095 293.8591679750953 +27 512 512 512 90 0.9891538619995117 3.221225472 0.010990598466661242 293.0891781526938 +27 512 512 512 90 0.9829168319702148 3.221225472 0.010921298133002386 294.94895503914324 +27 512 512 512 90 0.9864559173583984 3.221225472 0.010960621303982204 293.8907734025686 +27 512 512 512 90 0.9825069904327393 3.221225472 0.010916744338141547 295.071989617408 +27 512 512 512 90 0.984705924987793 3.221225472 0.01094117694430881 294.41306802697864 +27 512 512 512 90 0.979964017868042 3.221225472 0.010888489087422688 295.83769117432854 +27 512 512 512 90 0.9847640991210938 3.221225472 0.010941823323567709 294.3956758159098 +27 512 512 512 90 0.9835910797119141 3.221225472 0.010928789774576822 294.74676871298226 +27 512 512 512 90 0.9791169166564941 3.221225472 0.010879076851738823 296.09364065528644 +27 512 512 512 90 0.9816069602966309 3.221225472 0.010906744003295898 295.3425395357754 +27 512 512 512 90 0.9833881855010986 3.221225472 0.01092653539445665 294.80758133399 diff --git a/paper/data/data_raw/out_diff3D_pareff_512.txt b/paper/data/data_raw/out_diff3D_pareff_512.txt new file mode 100644 index 0000000..3c43aa8 --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_512.txt @@ -0,0 +1,20 @@ +512 512 512 512 90 0.9991440773010254 3.221225472 0.011101600858900282 290.1586458512879 +512 512 512 512 90 0.9981718063354492 3.221225472 0.011090797848171657 290.44127537957297 +512 512 512 512 90 0.9976179599761963 3.221225472 0.011084643999735515 290.6025193120194 +512 512 512 512 90 1.0039019584655762 3.221225472 0.011154466205173068 288.7834713691726 +512 512 512 512 90 1.0002031326293945 3.221225472 0.011113368140326606 289.8514142000998 +512 512 512 512 90 1.0193839073181152 3.221225472 0.011326487859090168 284.3975566013412 +512 512 512 512 90 1.0015339851379395 3.221225472 0.01112815539042155 289.46625554605737 +512 512 512 512 90 1.0001678466796875 3.221225472 0.01111297607421875 289.86164016612935 +512 512 512 512 90 0.998852014541626 3.221225472 0.011098355717129178 290.24348778336304 +512 512 512 512 90 0.9998509883880615 3.221225472 0.011109455426534017 289.953498918261 +512 512 512 512 90 0.9953110218048096 3.221225472 0.011059011353386774 291.276079666336 +512 512 512 512 90 0.9934980869293213 3.221225472 0.011038867632548015 291.80760012940476 +512 512 512 512 90 0.9922981262207031 3.221225472 0.01102553473578559 292.16047558626474 +512 512 512 512 90 0.9951930046081543 3.221225472 0.011057700051201715 291.3106212941567 +512 512 512 512 90 0.9982829093933105 3.221225472 0.01109203232659234 290.40895096179503 +512 512 512 512 90 0.9964919090270996 3.221225472 0.01107213232252333 290.93090456002477 +512 512 512 512 90 0.9925789833068848 3.221225472 0.011028655370076497 292.0778067596519 +512 512 512 512 90 1.0022060871124268 3.221225472 0.011135623190138074 289.2721329555027 +512 512 512 512 90 0.991750955581665 3.221225472 0.0110194550620185 292.3216668946557 +512 512 512 512 90 0.9951310157775879 3.221225472 0.011057011286417643 291.3287676532384 diff --git a/paper/data/data_raw/out_diff3D_pareff_64.txt b/paper/data/data_raw/out_diff3D_pareff_64.txt new file mode 100644 index 0000000..dad2536 --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_64.txt @@ -0,0 +1,20 @@ +64 512 512 512 90 0.9891541004180908 3.221225472 0.010990601115756565 293.08910750858956 +64 512 512 512 90 0.9896070957183838 3.221225472 0.010995634396870931 292.9549451841247 +64 512 512 512 90 0.992056131362915 3.221225472 0.011022845904032389 292.23174305844265 +64 512 512 512 90 0.9884791374206543 3.221225472 0.010983101526896159 293.28923748101994 +64 512 512 512 90 0.994920015335083 3.221225472 0.011054666837056477 291.39055201574166 +64 512 512 512 90 0.9878499507904053 3.221225472 0.010976110564337836 293.47604081777297 +64 512 512 512 90 0.9888169765472412 3.221225472 0.010986855294969346 293.18903230435126 +64 512 512 512 90 0.9844260215759277 3.221225472 0.010938066906399197 294.49677896150524 +64 512 512 512 90 0.9901220798492432 3.221225472 0.011001356442769368 292.80257291519246 +64 512 512 512 90 0.9839041233062744 3.221225472 0.010932268036736383 294.65299068551144 +64 512 512 512 90 0.9850249290466309 3.221225472 0.010944721433851454 294.3177212383786 +64 512 512 512 90 0.9897830486297607 3.221225472 0.010997589429219563 292.90286682657074 +64 512 512 512 90 0.9830901622772217 3.221225472 0.010923224025302464 294.8969520846941 +64 512 512 512 90 0.9945449829101562 3.221225472 0.011050499810112847 291.50043232000246 +64 512 512 512 90 0.9882810115814209 3.221225472 0.010980900128682454 293.34803470127724 +64 512 512 512 90 0.9904758930206299 3.221225472 0.01100528770022922 292.69797934795537 +64 512 512 512 90 0.9823501110076904 3.221225472 0.010915001233418783 295.119112047141 +64 512 512 512 90 0.9806761741638184 3.221225472 0.010896401935153538 295.6228570834755 +64 512 512 512 90 0.9884629249572754 3.221225472 0.01098292138841417 293.2940479204426 +64 512 512 512 90 0.9870820045471191 3.221225472 0.010967577828301324 293.7043641201959 diff --git a/paper/data/data_raw/out_diff3D_pareff_8.txt b/paper/data/data_raw/out_diff3D_pareff_8.txt new file mode 100644 index 0000000..8d606be --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_8.txt @@ -0,0 +1,20 @@ +8 512 512 512 90 0.9687850475311279 3.221225472 0.010764278305901421 299.25141105223855 +8 512 512 512 90 0.9645669460296631 3.221225472 0.0107174105114407 300.5600530614538 +8 512 512 512 90 0.9632339477539062 3.221225472 0.010702599419487847 300.975991508626 +8 512 512 512 90 0.9634730815887451 3.221225472 0.010705256462097168 300.90128932501625 +8 512 512 512 90 0.9681990146636963 3.221225472 0.010757766829596626 299.4325423690916 +8 512 512 512 90 0.9681999683380127 3.221225472 0.01075777742597792 299.4322474288577 +8 512 512 512 90 0.9731669425964355 3.221225472 0.010812966028849284 297.9039667197403 +8 512 512 512 90 0.9741618633270264 3.221225472 0.010824020703633625 297.5997145791336 +8 512 512 512 90 0.9661529064178467 3.221225472 0.01073503229353163 300.06667739052284 +8 512 512 512 90 0.9670360088348389 3.221225472 0.01074484454260932 299.7926549077596 +8 512 512 512 90 0.9747970104217529 3.221225472 0.010831077893575033 297.4058079584879 +8 512 512 512 90 0.9715781211853027 3.221225472 0.010795312457614475 298.3911289874623 +8 512 512 512 90 0.9669270515441895 3.221225472 0.01074363390604655 299.8264367689488 +8 512 512 512 90 0.9685518741607666 3.221225472 0.010761687490675185 299.32345413218286 +8 512 512 512 90 0.9622619152069092 3.221225472 0.010691799057854546 301.28002355539803 +8 512 512 512 90 0.9672901630401611 3.221225472 0.010747668478224013 299.7138847859483 +8 512 512 512 90 0.9680109024047852 3.221225472 0.010755676693386501 299.4907306930006 +8 512 512 512 90 0.9707579612731934 3.221225472 0.010786199569702148 298.64322935839687 +8 512 512 512 90 0.9707150459289551 3.221225472 0.010785722732543945 298.65643238542947 +8 512 512 512 90 0.9659500122070312 3.221225472 0.010732777913411458 300.12970528112976 diff --git a/paper/data/data_raw/out_diff3D_pareff_noMPI_1.txt b/paper/data/data_raw/out_diff3D_pareff_noMPI_1.txt new file mode 100644 index 0000000..bfb86ef --- /dev/null +++ b/paper/data/data_raw/out_diff3D_pareff_noMPI_1.txt @@ -0,0 +1,20 @@ +1 512 512 512 90 0.9372940063476562 3.221225472 0.010414377848307291 309.30560797000123 +1 512 512 512 90 0.9418549537658691 3.221225472 0.01046505504184299 307.80779070156836 +1 512 512 512 90 0.939877986907959 3.221225472 0.010443088743421767 308.4552426147954 +1 512 512 512 90 0.9386570453643799 3.221225472 0.010429522726270887 308.8564603139573 +1 512 512 512 90 0.9374549388885498 3.221225472 0.010416165987650553 309.25250959125435 +1 512 512 512 90 0.9442510604858398 3.221225472 0.010491678449842666 307.02670572679494 +1 512 512 512 90 0.9386410713195801 3.221225472 0.010429345236884224 308.86171651580537 +1 512 512 512 90 0.9295392036437988 3.221225472 0.010328213373819986 311.88603056605905 +1 512 512 512 90 0.9415910243988037 3.221225472 0.010462122493320042 307.89406968392115 +1 512 512 512 90 0.9325551986694336 3.221225472 0.010361724429660372 310.87735384848315 +1 512 512 512 90 0.9326050281524658 3.221225472 0.010362278090582953 310.8607435393372 +1 512 512 512 90 0.9398880004882812 3.221225472 0.010443200005425347 308.45195632818877 +1 512 512 512 90 0.9335529804229736 3.221225472 0.010372810893588595 310.5450880234431 +1 512 512 512 90 0.9386279582977295 3.221225472 0.010429199536641438 308.86603144207805 +1 512 512 512 90 0.931318998336792 3.221225472 0.0103479888704088 311.2900015974548 +1 512 512 512 90 0.9340448379516602 3.221225472 0.010378275977240668 310.38155846540184 +1 512 512 512 90 0.9439029693603516 3.221225472 0.010487810770670573 307.13993057619217 +1 512 512 512 90 0.9443621635437012 3.221225472 0.010492912928263346 306.9905844089699 +1 512 512 512 90 0.9342830181121826 3.221225472 0.010380922423468695 310.3024317682605 +1 512 512 512 90 0.9396378993988037 3.221225472 0.010440421104431153 308.53405622047546 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_noMPI_1.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_noMPI_1.txt new file mode 100644 index 0000000..bfb86ef --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_noMPI_1.txt @@ -0,0 +1,20 @@ +1 512 512 512 90 0.9372940063476562 3.221225472 0.010414377848307291 309.30560797000123 +1 512 512 512 90 0.9418549537658691 3.221225472 0.01046505504184299 307.80779070156836 +1 512 512 512 90 0.939877986907959 3.221225472 0.010443088743421767 308.4552426147954 +1 512 512 512 90 0.9386570453643799 3.221225472 0.010429522726270887 308.8564603139573 +1 512 512 512 90 0.9374549388885498 3.221225472 0.010416165987650553 309.25250959125435 +1 512 512 512 90 0.9442510604858398 3.221225472 0.010491678449842666 307.02670572679494 +1 512 512 512 90 0.9386410713195801 3.221225472 0.010429345236884224 308.86171651580537 +1 512 512 512 90 0.9295392036437988 3.221225472 0.010328213373819986 311.88603056605905 +1 512 512 512 90 0.9415910243988037 3.221225472 0.010462122493320042 307.89406968392115 +1 512 512 512 90 0.9325551986694336 3.221225472 0.010361724429660372 310.87735384848315 +1 512 512 512 90 0.9326050281524658 3.221225472 0.010362278090582953 310.8607435393372 +1 512 512 512 90 0.9398880004882812 3.221225472 0.010443200005425347 308.45195632818877 +1 512 512 512 90 0.9335529804229736 3.221225472 0.010372810893588595 310.5450880234431 +1 512 512 512 90 0.9386279582977295 3.221225472 0.010429199536641438 308.86603144207805 +1 512 512 512 90 0.931318998336792 3.221225472 0.0103479888704088 311.2900015974548 +1 512 512 512 90 0.9340448379516602 3.221225472 0.010378275977240668 310.38155846540184 +1 512 512 512 90 0.9439029693603516 3.221225472 0.010487810770670573 307.13993057619217 +1 512 512 512 90 0.9443621635437012 3.221225472 0.010492912928263346 306.9905844089699 +1 512 512 512 90 0.9342830181121826 3.221225472 0.010380922423468695 310.3024317682605 +1 512 512 512 90 0.9396378993988037 3.221225472 0.010440421104431153 308.53405622047546 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_1.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_1.txt new file mode 100644 index 0000000..9092620 --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_1.txt @@ -0,0 +1,20 @@ +1 512 512 512 90 0.9406859874725342 3.221225472 0.010452066527472601 308.1902955299041 +1 512 512 512 90 0.9445340633392334 3.221225472 0.010494822925991482 306.9347138790033 +1 512 512 512 90 0.9444530010223389 3.221225472 0.010493922233581542 306.96105805813716 +1 512 512 512 90 0.9364500045776367 3.221225472 0.01040500005086263 309.5843783040581 +1 512 512 512 90 0.936363935470581 3.221225472 0.0104040437274509 309.61283481545246 +1 512 512 512 90 0.9363939762115479 3.221225472 0.010404377513461642 309.6029020315955 +1 512 512 512 90 0.93212890625 3.221225472 0.010356987847222223 311.01952802464115 +1 512 512 512 90 0.9439539909362793 3.221225472 0.01048837767706977 307.12332938223693 +1 512 512 512 90 0.9438271522521973 3.221225472 0.010486968358357747 307.1646029553237 +1 512 512 512 90 0.9510378837585449 3.221225472 0.010567087597317166 304.8356931211419 +1 512 512 512 90 0.9445858001708984 3.221225472 0.01049539777967665 306.91790245793254 +1 512 512 512 90 0.9474070072174072 3.221225472 0.010526744524637859 306.00395634763606 +1 512 512 512 90 0.9386618137359619 3.221225472 0.010429575708177355 308.8548913331521 +1 512 512 512 90 0.9442949295043945 3.221225472 0.010492165883382162 307.0124422167098 +1 512 512 512 90 0.9362149238586426 3.221225472 0.010402388042873807 309.6621139995553 +1 512 512 512 90 0.9343531131744385 3.221225472 0.01038170125749376 310.2791529157942 +1 512 512 512 90 0.9438211917877197 3.221225472 0.010486902130974664 307.1665427758327 +1 512 512 512 90 0.9379210472106934 3.221225472 0.010421344969007705 309.0988237679188 +1 512 512 512 90 0.950782060623169 3.221225472 0.01056424511803521 304.91771404477777 +1 512 512 512 90 0.941072940826416 3.221225472 0.0104563660091824 308.0635728675945 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_1000.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_1000.txt new file mode 100644 index 0000000..cb63d8e --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_1000.txt @@ -0,0 +1,20 @@ +1000 512 512 512 90 1.0321760177612305 3.221225472 0.011468622419569228 280.8729204044187 +1000 512 512 512 90 1.0510649681091309 3.221225472 0.011678499645657009 275.8252831902004 +1000 512 512 512 90 1.0273869037628174 3.221225472 0.011415410041809081 282.18219583897746 +1000 512 512 512 90 1.0277771949768066 3.221225472 0.011419746610853408 282.075039120266 +1000 512 512 512 90 1.0329020023345947 3.221225472 0.01147668891482883 280.67550631592974 +1000 512 512 512 90 1.0249109268188477 3.221225472 0.011387899186876086 282.86389079666964 +1000 512 512 512 90 1.037930965423584 3.221225472 0.011532566282484266 279.3155827677676 +1000 512 512 512 90 1.0536549091339111 3.221225472 0.011707276768154569 275.14728965511296 +1000 512 512 512 90 1.0262329578399658 3.221225472 0.011402588420444065 282.4994951343295 +1000 512 512 512 90 1.0329978466033936 3.221225472 0.011477753851148817 280.6494645010692 +1000 512 512 512 90 1.0336041450500488 3.221225472 0.011484490500556098 280.4848392572594 +1000 512 512 512 90 1.0281410217285156 3.221225472 0.01142378913031684 281.9752216409004 +1000 512 512 512 90 1.0298659801483154 3.221225472 0.011442955334981282 281.5029315156607 +1000 512 512 512 90 1.023298978805542 3.221225472 0.01136998865339491 283.3094711170349 +1000 512 512 512 90 1.0275418758392334 3.221225472 0.01141713195376926 282.13963761157567 +1000 512 512 512 90 1.0317161083221436 3.221225472 0.011463512314690484 280.998125493528 +1000 512 512 512 90 1.0305440425872803 3.221225472 0.011450489362080893 281.3177122951021 +1000 512 512 512 90 1.024986982345581 3.221225472 0.011388744248284234 282.8429018840503 +1000 512 512 512 90 1.0292878150939941 3.221225472 0.011436531278822158 281.6610555654207 +1000 512 512 512 90 1.0286939144134521 3.221225472 0.01142993238237169 281.82366826317144 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_125.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_125.txt new file mode 100644 index 0000000..33177ca --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_125.txt @@ -0,0 +1,20 @@ +125 512 512 512 90 1.0315780639648438 3.221225472 0.011461978488498264 281.03572827609116 +125 512 512 512 90 1.0250580310821533 3.221225472 0.011389533678690592 282.823297500476 +125 512 512 512 90 1.0340421199798584 3.221225472 0.011489356888665094 280.3660381703281 +125 512 512 512 90 1.0235490798950195 3.221225472 0.011372767554389106 283.2402453136245 +125 512 512 512 90 1.0225999355316162 3.221225472 0.011362221505906846 283.5031397975643 +125 512 512 512 90 1.03078293800354 3.221225472 0.01145314375559489 281.25251378482204 +125 512 512 512 90 1.028069019317627 3.221225472 0.011422989103529188 281.9949702136008 +125 512 512 512 90 1.0197720527648926 3.221225472 0.011330800586276585 284.28930925687814 +125 512 512 512 90 1.027782917022705 3.221225472 0.011419810189141167 282.07346870467154 +125 512 512 512 90 1.0466759204864502 3.221225472 0.011629732449849446 276.98190701211706 +125 512 512 512 90 1.0260732173919678 3.221225472 0.01140081352657742 282.5434750327881 +125 512 512 512 90 1.0255498886108398 3.221225472 0.011394998762342665 282.68765439845976 +125 512 512 512 90 1.0263237953186035 3.221225472 0.01140359772576226 282.4744917757681 +125 512 512 512 90 1.0180480480194092 3.221225472 0.011311644977993435 284.7707365521837 +125 512 512 512 90 1.021589994430542 3.221225472 0.011350999938117133 283.7834102335769 +125 512 512 512 90 1.039639949798584 3.221225472 0.011551554997762043 278.85643730424766 +125 512 512 512 90 1.0382180213928223 3.221225472 0.01153575579325358 279.2383550528921 +125 512 512 512 90 1.026151180267334 3.221225472 0.011401679780748155 282.5220085060685 +125 512 512 512 90 1.0319550037384033 3.221225472 0.011466166708204481 280.9330750175723 +125 512 512 512 90 1.0234251022338867 3.221225472 0.011371390024820963 283.2745570215121 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_216.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_216.txt new file mode 100644 index 0000000..7c6fbdd --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_216.txt @@ -0,0 +1,20 @@ +216 512 512 512 90 1.0250859260559082 3.221225472 0.011389843622843425 282.8156012203296 +216 512 512 512 90 1.0291941165924072 3.221225472 0.01143549018436008 281.6866981710637 +216 512 512 512 90 1.021780014038086 3.221225472 0.011353111267089844 283.73063526098076 +216 512 512 512 90 1.0198049545288086 3.221225472 0.011331166161431207 284.28013728757605 +216 512 512 512 90 1.022798776626587 3.221225472 0.011364430851406522 283.448024288988 +216 512 512 512 90 1.024284839630127 3.221225472 0.011380942662556966 283.03678943904674 +216 512 512 512 90 1.0317859649658203 3.221225472 0.011464288499620225 280.9791006312087 +216 512 512 512 90 1.0252671241760254 3.221225472 0.011391856935289171 282.76561848502814 +216 512 512 512 90 1.0273308753967285 3.221225472 0.011414787504408094 282.19758543521255 +216 512 512 512 90 1.0252220630645752 3.221225472 0.011391356256273058 282.7780467515549 +216 512 512 512 90 1.0219321250915527 3.221225472 0.011354801389906142 283.6884029397036 +216 512 512 512 90 1.0217931270599365 3.221225472 0.011353256967332628 283.72699404837005 +216 512 512 512 90 1.0233349800109863 3.221225472 0.011370388666788737 283.29950421208855 +216 512 512 512 90 1.0295839309692383 3.221225472 0.011439821455213758 281.5800477840421 +216 512 512 512 90 1.023813009262085 3.221225472 0.011375700102912055 283.16722864163773 +216 512 512 512 90 1.0218260288238525 3.221225472 0.01135362254248725 283.71785832632787 +216 512 512 512 90 1.0258259773254395 3.221225472 0.011398066414727105 282.61157241880517 +216 512 512 512 90 1.0240209102630615 3.221225472 0.011378010114034018 283.10973884852086 +216 512 512 512 90 1.0236899852752686 3.221225472 0.011374333169725206 283.2012588284173 +216 512 512 512 90 1.0213518142700195 3.221225472 0.011348353491889106 283.8495887797533 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_2197.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_2197.txt new file mode 100644 index 0000000..9264dcf --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_2197.txt @@ -0,0 +1,20 @@ +2197 512 512 512 90 1.048914909362793 3.221225472 0.011654610104031033 276.3906680057757 +2197 512 512 512 90 1.031451940536499 3.221225472 0.011460577117072211 281.07009263970764 +2197 512 512 512 90 1.0249459743499756 3.221225472 0.011388288603888618 282.85421840293793 +2197 512 512 512 90 1.0257868766784668 3.221225472 0.011397631963094075 282.62234492484396 +2197 512 512 512 90 1.0331628322601318 3.221225472 0.011479587025112576 280.6046476195785 +2197 512 512 512 90 1.0406270027160645 3.221225472 0.011562522252400715 278.59193709496907 +2197 512 512 512 90 1.029627799987793 3.221225472 0.011440308888753255 281.56805059404684 +2197 512 512 512 90 1.030975103378296 3.221225472 0.01145527892642551 281.2000906035683 +2197 512 512 512 90 1.0281450748443604 3.221225472 0.011423834164937337 281.9741100485127 +2197 512 512 512 90 1.0318272113800049 3.221225472 0.011464746793111165 280.9678687309118 +2197 512 512 512 90 1.0405371189117432 3.221225472 0.011561523543463813 278.61600245765936 +2197 512 512 512 90 1.0330379009246826 3.221225472 0.01147819889916314 280.63858278626407 +2197 512 512 512 90 1.0484299659729004 3.221225472 0.011649221844143338 276.5185104290443 +2197 512 512 512 90 1.0340020656585693 3.221225472 0.01148891184065077 280.3768987592422 +2197 512 512 512 90 1.054955005645752 3.221225472 0.0117217222849528 274.8082059694499 +2197 512 512 512 90 1.0309948921203613 3.221225472 0.011455498801337349 281.1946932964582 +2197 512 512 512 90 1.0436489582061768 3.221225472 0.011596099535624186 277.7852554735432 +2197 512 512 512 90 1.0267071723937988 3.221225472 0.011407857471042209 282.3690145302729 +2197 512 512 512 90 1.029412031173706 3.221225472 0.011437911457485623 281.62706836586375 +2197 512 512 512 90 1.0439260005950928 3.221225472 0.01159917778438992 277.71153541030293 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_27.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_27.txt new file mode 100644 index 0000000..d5d9c44 --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_27.txt @@ -0,0 +1,20 @@ +27 512 512 512 90 1.0259640216827393 3.221225472 0.011399600240919325 282.5735467843233 +27 512 512 512 90 1.0188629627227783 3.221225472 0.011320699585808648 284.54296906156304 +27 512 512 512 90 1.023468017578125 3.221225472 0.011371866861979166 283.2626789511477 +27 512 512 512 90 1.0134360790252686 3.221225472 0.01126040087805854 286.0666779880564 +27 512 512 512 90 1.0192108154296875 3.221225472 0.011324564615885416 284.44585564741794 +27 512 512 512 90 1.0172181129455566 3.221225472 0.011302423477172852 285.00307730512907 +27 512 512 512 90 1.0219600200653076 3.221225472 0.011355111334058974 283.68065950512766 +27 512 512 512 90 1.0158379077911377 3.221225472 0.011287087864345974 285.3903071114838 +27 512 512 512 90 1.0157220363616943 3.221225472 0.011285800404018826 285.42286383630665 +27 512 512 512 90 1.019331932067871 3.221225472 0.01132591035630968 284.41205789744316 +27 512 512 512 90 1.0130431652069092 3.221225472 0.011256035168965657 286.1776303685808 +27 512 512 512 90 1.0182631015777588 3.221225472 0.011314034461975098 284.71059398184553 +27 512 512 512 90 1.0200870037078857 3.221225472 0.01133430004119873 284.2015351888743 +27 512 512 512 90 1.0138649940490723 3.221225472 0.011265166600545247 285.94565763848436 +27 512 512 512 90 1.0143451690673828 3.221225472 0.011270501878526476 285.8102954702802 +27 512 512 512 90 1.0075318813323975 3.221225472 0.011194798681471083 287.7430460032807 +27 512 512 512 90 1.0179579257965088 3.221225472 0.01131064361996121 284.7959479790459 +27 512 512 512 90 1.0140039920806885 3.221225472 0.01126671102311876 285.90646066897403 +27 512 512 512 90 1.0189950466156006 3.221225472 0.011322167184617784 284.5060861118827 +27 512 512 512 90 1.0126969814300537 3.221225472 0.011252188682556152 286.27545830205867 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_512.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_512.txt new file mode 100644 index 0000000..c64c496 --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_512.txt @@ -0,0 +1,20 @@ +512 512 512 512 90 1.0303359031677246 3.221225472 0.011448176701863606 281.37454163121265 +512 512 512 512 90 1.0290100574493408 3.221225472 0.011433445082770454 281.7370835019973 +512 512 512 512 90 1.0246670246124268 3.221225472 0.011385189162360298 282.93122108585135 +512 512 512 512 90 1.033588171005249 3.221225472 0.011484313011169433 280.489174134064 +512 512 512 512 90 1.0225529670715332 3.221225472 0.011361699634128147 283.5161618182652 +512 512 512 512 90 1.022284984588623 3.221225472 0.0113587220509847 283.5904829382411 +512 512 512 512 90 1.0221688747406006 3.221225472 0.011357431941562229 283.6226964488344 +512 512 512 512 90 1.0271661281585693 3.221225472 0.01141295697953966 282.24284712321133 +512 512 512 512 90 1.0199179649353027 3.221225472 0.011332421832614474 284.2486380739358 +512 512 512 512 90 1.025109052658081 3.221225472 0.01139010058508979 282.8092208612051 +512 512 512 512 90 1.025683879852295 3.221225472 0.011396487553914389 282.6507252134536 +512 512 512 512 90 1.0211849212646484 3.221225472 0.01134649912516276 283.89597852754366 +512 512 512 512 90 1.0226640701293945 3.221225472 0.011362934112548828 283.4853603914319 +512 512 512 512 90 1.027890920639038 3.221225472 0.011421010229322645 282.04383038986595 +512 512 512 512 90 1.0252108573913574 3.221225472 0.011391231748792861 282.7811375483039 +512 512 512 512 90 1.0222411155700684 3.221225472 0.011358234617445204 283.60265309650265 +512 512 512 512 90 1.0271611213684082 3.221225472 0.011412901348537868 282.2442228866438 +512 512 512 512 90 1.0223748683929443 3.221225472 0.011359720759921603 283.5655506044526 +512 512 512 512 90 1.023226022720337 3.221225472 0.011369178030225966 283.32967110164753 +512 512 512 512 90 1.0279240608215332 3.221225472 0.01142137845357259 282.0347373212561 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_64.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_64.txt new file mode 100644 index 0000000..e18d14d --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_64.txt @@ -0,0 +1,20 @@ +64 512 512 512 90 1.024595022201538 3.221225472 0.011384389135572645 282.95110380008714 +64 512 512 512 90 1.0188770294189453 3.221225472 0.011320855882432725 284.53904063901876 +64 512 512 512 90 1.024925947189331 3.221225472 0.011388066079881456 282.8597454040705 +64 512 512 512 90 1.0149600505828857 3.221225472 0.011277333895365397 285.63714632266186 +64 512 512 512 90 1.0179758071899414 3.221225472 0.01131084230211046 284.7909453568246 +64 512 512 512 90 1.0129811763763428 3.221225472 0.011255346404181587 286.19514285257804 +64 512 512 512 90 1.0204441547393799 3.221225472 0.01133826838599311 284.10206588330425 +64 512 512 512 90 1.0188329219818115 3.221225472 0.011320365799797906 284.5513589373151 +64 512 512 512 90 1.0134727954864502 3.221225472 0.011260808838738336 286.0563142603624 +64 512 512 512 90 1.0123369693756104 3.221225472 0.011248188548617893 286.3772649326548 +64 512 512 512 90 1.0179169178009033 3.221225472 0.011310187975565593 284.80742132306733 +64 512 512 512 90 1.018286943435669 3.221225472 0.011314299371507432 284.7039278553956 +64 512 512 512 90 1.0203800201416016 3.221225472 0.011337555779351128 284.1199227320897 +64 512 512 512 90 1.018629789352417 3.221225472 0.01131810877058241 284.6081034644661 +64 512 512 512 90 1.0183768272399902 3.221225472 0.011315298080444337 284.67879936517824 +64 512 512 512 90 1.017143964767456 3.221225472 0.01130159960852729 285.0238535764016 +64 512 512 512 90 1.0198941230773926 3.221225472 0.01133215692308214 284.2552828966549 +64 512 512 512 90 1.0157451629638672 3.221225472 0.01128605736626519 285.416365295862 +64 512 512 512 90 1.0269749164581299 3.221225472 0.011410832405090332 282.2953977102514 +64 512 512 512 90 1.0178780555725098 3.221225472 0.011309756173027886 284.8182951708677 diff --git a/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_8.txt b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_8.txt new file mode 100644 index 0000000..98de6a2 --- /dev/null +++ b/paper/data/data_raw_nordma/out_diff3D_pareff_nordma_8.txt @@ -0,0 +1,20 @@ +8 512 512 512 90 0.9805209636688232 3.221225472 0.010894677374098036 295.6696523807511 +8 512 512 512 90 0.9866609573364258 3.221225472 0.010962899525960286 293.829699375799 +8 512 512 512 90 0.9808030128479004 3.221225472 0.010897811253865559 295.5846267623143 +8 512 512 512 90 0.9871678352355957 3.221225472 0.01096853150261773 293.67882758336685 +8 512 512 512 90 0.9833920001983643 3.221225472 0.010926577779981825 294.8064377394985 +8 512 512 512 90 0.9821388721466064 3.221225472 0.010912654134962294 295.18258639570917 +8 512 512 512 90 0.987868070602417 3.221225472 0.010976311895582411 293.4706577804547 +8 512 512 512 90 0.9853620529174805 3.221225472 0.010948467254638671 294.2170257334627 +8 512 512 512 90 0.980492115020752 3.221225472 0.01089435683356391 295.67835175692784 +8 512 512 512 90 0.9835770130157471 3.221225472 0.010928633477952745 294.75098405472653 +8 512 512 512 90 0.9855170249938965 3.221225472 0.01095018916659885 294.17076024820113 +8 512 512 512 90 0.9810080528259277 3.221225472 0.010900089475843642 295.52284677467605 +8 512 512 512 90 0.9866561889648438 3.221225472 0.01096284654405382 293.8311194137049 +8 512 512 512 90 0.979773998260498 3.221225472 0.010886377758449979 295.8950666120044 +8 512 512 512 90 0.9796319007873535 3.221225472 0.01088479889763726 295.93798675501705 +8 512 512 512 90 0.9798989295959473 3.221225472 0.010887765884399414 295.857341735787 +8 512 512 512 90 0.9837348461151123 3.221225472 0.010930387179056804 294.7036934036552 +8 512 512 512 90 0.9833722114562988 3.221225472 0.010926357905069987 294.8123702322899 +8 512 512 512 90 0.9876101016998291 3.221225472 0.010973445574442546 293.54731384482574 +8 512 512 512 90 0.9800069332122803 3.221225472 0.010888965924580891 295.8247361880676 diff --git a/paper/figure/.gitignore b/paper/figure/.gitignore new file mode 100644 index 0000000..3545968 --- /dev/null +++ b/paper/figure/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +Manifest.toml diff --git a/paper/figure/Project.toml b/paper/figure/Project.toml new file mode 100644 index 0000000..208b475 --- /dev/null +++ b/paper/figure/Project.toml @@ -0,0 +1,3 @@ +[deps] +DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/paper/figure/README.md b/paper/figure/README.md new file mode 100644 index 0000000..57d5eb6 --- /dev/null +++ b/paper/figure/README.md @@ -0,0 +1,11 @@ +# Parallel efficiency on a weak scaling benchmark + +This folder contains raw data of a weak scaling benchmark from 1 to 2197 +Nvidia P100 GPUs on the Cray XC 50 Piz Daint at CSCS. The raw data are +contained in the `out_diff3D_pareff.txt` script and the plotting script +is `julia_scale.jl`. To reproduce the figure, execute +```bash +julia julia_scale.jl +``` + +The script was created for Julia v1.8.0. diff --git a/paper/figure/julia_c_gpu_par_eff_lin.png b/paper/figure/julia_c_gpu_par_eff_lin.png new file mode 100644 index 0000000..f679005 Binary files /dev/null and b/paper/figure/julia_c_gpu_par_eff_lin.png differ diff --git a/paper/figure/julia_c_scale_daint.jl b/paper/figure/julia_c_scale_daint.jl new file mode 100644 index 0000000..1950193 --- /dev/null +++ b/paper/figure/julia_c_scale_daint.jl @@ -0,0 +1,38 @@ +# Activate the current environment and load all packages +using Pkg +Pkg.activate(@__DIR__) + +ax_log = false # choose between log or linear x-axis scale + +using Plots, Plots.Measures + +# Weak scaling parallel efficiency data on Piz Daint + +# CUDA C +nprocs_C = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,5120] +Teff_C = [0.9500,0.9439,0.9433,0.9424,0.9356,0.9351,0.9346,0.9344,0.9342,0.9307,0.9320,0.9316,0.9313,0.9307] +Teff_lo_C = [0.9499,0.9438,0.9432,0.9423,0.9354,0.9350,0.9345,0.9343,0.9342,0.9307,0.9261,0.9274,0.9277,0.9299] +Teff_hi_C = [0.9501,0.9440,0.9433,0.9425,0.9357,0.9352,0.9347,0.9345,0.9344,0.9308,0.9322,0.9319,0.9316,0.9313] +σs_C = Teff_hi_C .- Teff_lo_C + +# Julia +nprocs_jl = [1,2,4,8,16,32,64,128,256,512,1024] +Teff_jl = [0.9870,0.9718,0.9714,0.9711,0.9564,0.9559,0.9559,0.9549,0.9537,0.9528,0.9521] +Teff_lo_jl = [0.9868,0.9718,0.9714,0.9710,0.9563,0.9559,0.9559,0.9547,0.9532,0.9525,0.9519] +Teff_hi_jl = [0.9870,0.9718,0.9715,0.9711,0.9564,0.9560,0.9560,0.9549,0.9538,0.9531,0.9523] +σs_jl = Teff_hi_jl .- Teff_lo_jl + +default(fontfamily="Computer Modern", linewidth=3, markershape=:circle, markersize=4, + framestyle=:box, fillalpha=0.4,margin=5mm) +scalefontsizes(); scalefontsizes(1.3) + +# xtick_lin = (1,512,1024,2048,4096,5120) +xtick_lin = (1,64,128,256,512,1024) +plot(xlabel="Number of GPUs", ylabel="Parallel efficiency", + xticks=(xtick_lin, string.(xtick_lin))) + +plot!(nprocs_C[1:end-3], Teff_C[1:end-3], ribbon=σs_C, label="CUDA C") +plot!(nprocs_jl, Teff_jl, ribbon=σs_jl, label="Julia",foreground_color_legend = nothing, + dpi=150,size=(600, 380)) + +png("julia_c_gpu_par_eff_lin.png") diff --git a/paper/figure/julia_gpu_par_eff.png b/paper/figure/julia_gpu_par_eff.png new file mode 100644 index 0000000..6067b17 Binary files /dev/null and b/paper/figure/julia_gpu_par_eff.png differ diff --git a/paper/figure/julia_scale.jl b/paper/figure/julia_scale.jl new file mode 100644 index 0000000..796ec7f --- /dev/null +++ b/paper/figure/julia_scale.jl @@ -0,0 +1,45 @@ +# Activate the current environment and load all packages +using Pkg +Pkg.activate(@__DIR__) + +using DelimitedFiles, Statistics + +nexp = 20 + +data_r = readdlm("out_diff3D_pareff_noMPI_1.txt") +Teff_ref = mean(data_r[:,end]) + +data = readdlm("out_diff3D_pareff.txt") +# data = readdlm("out_diff3D_pareff_nordma.txt") +nprocs = convert(Vector{Int64},data[1:nexp:end,1]) +Teff = zeros(Float64,length(nprocs),3) +for i ∈ 1:length(nprocs) + local range = nexp*(i-1)+1 : nexp*i + Teff[i,1] = median(data[range,end]./Teff_ref) + # 95% of confidence interval + tmp = sort(data[range,end]./Teff_ref; rev=true) + Teff[i,2] = tmp[5] # 5th rank <= (n-1.96*n^(1/2))/2 = 5.617306764100412 + Teff[i,3] = tmp[16] # 16th rank <= 1+(n+1.96*n^(1/2))/2 = 15.382693235899588 + # previous naive approach + # Teff[i,2] = minimum(data[range,end]./Teff_ref) + # Teff[i,3] = maximum(data[range,end]./Teff_ref) +end +σs = Teff[:,3] .- Teff[:,2] + +# Weak scaling parallel efficiency data on Piz Daint +using Plots, Plots.Measures + +ax_log = false # choose between log or linear x-axis scale + +default(fontfamily="Computer Modern", linewidth=3, markershape=:circle, markersize=4, + framestyle=:box, fillalpha=0.4,margin=5mm) +scalefontsizes(); scalefontsizes(1.3) + +# xtick_lin = (1,512,1024,2048,4096,5120) +xtick_lin = (1,64,216,512,1000,2197) +plot(xlabel="Number of GPUs", ylabel="Parallel efficiency", + xticks=(xtick_lin, string.(xtick_lin)), legend=false) +plot!(nprocs[[1,4,6,7,8,9]], Teff[[1,4,6,7,8,9],1], ribbon=σs,dpi=150,size=(600, 380)) + +png("julia_gpu_par_eff.png") +# png("julia_gpu_par_eff_nordma.png") diff --git a/paper/figure/out_diff3D_pareff.txt b/paper/figure/out_diff3D_pareff.txt new file mode 100644 index 0000000..1cf060d --- /dev/null +++ b/paper/figure/out_diff3D_pareff.txt @@ -0,0 +1,180 @@ +1 512 512 512 90 0.9340479373931885 3.221225472 0.010378310415479873 310.3805285295137 +1 512 512 512 90 0.9466681480407715 3.221225472 0.010518534978230795 306.2427874857727 +1 512 512 512 90 0.9535219669342041 3.221225472 0.010594688521491157 304.0415454843996 +1 512 512 512 90 0.9361491203308105 3.221225472 0.010401656892564561 309.68388068083993 +1 512 512 512 90 0.9420769214630127 3.221225472 0.01046752134958903 307.7352664894703 +1 512 512 512 90 0.93709397315979 3.221225472 0.010412155257331 309.37163271091225 +1 512 512 512 90 0.9356751441955566 3.221225472 0.01039639049106174 309.84075432424714 +1 512 512 512 90 0.9484589099884033 3.221225472 0.010538432333204482 305.6645780084924 +1 512 512 512 90 0.943687915802002 3.221225472 0.01048542128668891 307.2099235620889 +1 512 512 512 90 0.9328708648681641 3.221225472 0.01036523183186849 310.7721587177781 +1 512 512 512 90 0.938805103302002 3.221225472 0.010431167814466688 308.80775089559717 +1 512 512 512 90 0.9378070831298828 3.221225472 0.010420078701443142 309.13638603841565 +1 512 512 512 90 0.9503159523010254 3.221225472 0.01055906613667806 305.0672692361235 +1 512 512 512 90 0.9375438690185547 3.221225472 0.010417154100206162 309.22317564028833 +1 512 512 512 90 0.9450640678405762 3.221225472 0.01050071186489529 306.76258080833657 +1 512 512 512 90 0.9377551078796387 3.221225472 0.010419501198662652 309.1535199797708 +1 512 512 512 90 0.9454519748687744 3.221225472 0.010505021942986383 306.6367199880656 +1 512 512 512 90 0.9402899742126465 3.221225472 0.010447666380140516 308.3200931954602 +1 512 512 512 90 0.9480838775634766 3.221225472 0.010534265306260851 305.7854893862909 +1 512 512 512 90 0.9321670532226562 3.221225472 0.010357411702473958 311.0068002057485 +8 512 512 512 90 0.9687850475311279 3.221225472 0.010764278305901421 299.25141105223855 +8 512 512 512 90 0.9645669460296631 3.221225472 0.0107174105114407 300.5600530614538 +8 512 512 512 90 0.9632339477539062 3.221225472 0.010702599419487847 300.975991508626 +8 512 512 512 90 0.9634730815887451 3.221225472 0.010705256462097168 300.90128932501625 +8 512 512 512 90 0.9681990146636963 3.221225472 0.010757766829596626 299.4325423690916 +8 512 512 512 90 0.9681999683380127 3.221225472 0.01075777742597792 299.4322474288577 +8 512 512 512 90 0.9731669425964355 3.221225472 0.010812966028849284 297.9039667197403 +8 512 512 512 90 0.9741618633270264 3.221225472 0.010824020703633625 297.5997145791336 +8 512 512 512 90 0.9661529064178467 3.221225472 0.01073503229353163 300.06667739052284 +8 512 512 512 90 0.9670360088348389 3.221225472 0.01074484454260932 299.7926549077596 +8 512 512 512 90 0.9747970104217529 3.221225472 0.010831077893575033 297.4058079584879 +8 512 512 512 90 0.9715781211853027 3.221225472 0.010795312457614475 298.3911289874623 +8 512 512 512 90 0.9669270515441895 3.221225472 0.01074363390604655 299.8264367689488 +8 512 512 512 90 0.9685518741607666 3.221225472 0.010761687490675185 299.32345413218286 +8 512 512 512 90 0.9622619152069092 3.221225472 0.010691799057854546 301.28002355539803 +8 512 512 512 90 0.9672901630401611 3.221225472 0.010747668478224013 299.7138847859483 +8 512 512 512 90 0.9680109024047852 3.221225472 0.010755676693386501 299.4907306930006 +8 512 512 512 90 0.9707579612731934 3.221225472 0.010786199569702148 298.64322935839687 +8 512 512 512 90 0.9707150459289551 3.221225472 0.010785722732543945 298.65643238542947 +8 512 512 512 90 0.9659500122070312 3.221225472 0.010732777913411458 300.12970528112976 +27 512 512 512 90 0.9870240688323975 3.221225472 0.010966934098137749 293.7216037932592 +27 512 512 512 90 0.9850809574127197 3.221225472 0.010945343971252441 294.30098135430325 +27 512 512 512 90 0.9811930656433105 3.221225472 0.010902145173814562 295.4671232719351 +27 512 512 512 90 0.9798460006713867 3.221225472 0.01088717778523763 295.87332323789104 +27 512 512 512 90 0.9858770370483398 3.221225472 0.010954189300537109 294.0633381095629 +27 512 512 512 90 0.9844610691070557 3.221225472 0.010938456323411729 294.4862946616669 +27 512 512 512 90 0.9826450347900391 3.221225472 0.010918278164333767 295.0305371887875 +27 512 512 512 90 0.9798588752746582 3.221225472 0.01088732083638509 295.8694356865799 +27 512 512 512 90 0.9865620136260986 3.221225472 0.010961800151401095 293.8591679750953 +27 512 512 512 90 0.9891538619995117 3.221225472 0.010990598466661242 293.0891781526938 +27 512 512 512 90 0.9829168319702148 3.221225472 0.010921298133002386 294.94895503914324 +27 512 512 512 90 0.9864559173583984 3.221225472 0.010960621303982204 293.8907734025686 +27 512 512 512 90 0.9825069904327393 3.221225472 0.010916744338141547 295.071989617408 +27 512 512 512 90 0.984705924987793 3.221225472 0.01094117694430881 294.41306802697864 +27 512 512 512 90 0.979964017868042 3.221225472 0.010888489087422688 295.83769117432854 +27 512 512 512 90 0.9847640991210938 3.221225472 0.010941823323567709 294.3956758159098 +27 512 512 512 90 0.9835910797119141 3.221225472 0.010928789774576822 294.74676871298226 +27 512 512 512 90 0.9791169166564941 3.221225472 0.010879076851738823 296.09364065528644 +27 512 512 512 90 0.9816069602966309 3.221225472 0.010906744003295898 295.3425395357754 +27 512 512 512 90 0.9833881855010986 3.221225472 0.01092653539445665 294.80758133399 +64 512 512 512 90 0.9891541004180908 3.221225472 0.010990601115756565 293.08910750858956 +64 512 512 512 90 0.9896070957183838 3.221225472 0.010995634396870931 292.9549451841247 +64 512 512 512 90 0.992056131362915 3.221225472 0.011022845904032389 292.23174305844265 +64 512 512 512 90 0.9884791374206543 3.221225472 0.010983101526896159 293.28923748101994 +64 512 512 512 90 0.994920015335083 3.221225472 0.011054666837056477 291.39055201574166 +64 512 512 512 90 0.9878499507904053 3.221225472 0.010976110564337836 293.47604081777297 +64 512 512 512 90 0.9888169765472412 3.221225472 0.010986855294969346 293.18903230435126 +64 512 512 512 90 0.9844260215759277 3.221225472 0.010938066906399197 294.49677896150524 +64 512 512 512 90 0.9901220798492432 3.221225472 0.011001356442769368 292.80257291519246 +64 512 512 512 90 0.9839041233062744 3.221225472 0.010932268036736383 294.65299068551144 +64 512 512 512 90 0.9850249290466309 3.221225472 0.010944721433851454 294.3177212383786 +64 512 512 512 90 0.9897830486297607 3.221225472 0.010997589429219563 292.90286682657074 +64 512 512 512 90 0.9830901622772217 3.221225472 0.010923224025302464 294.8969520846941 +64 512 512 512 90 0.9945449829101562 3.221225472 0.011050499810112847 291.50043232000246 +64 512 512 512 90 0.9882810115814209 3.221225472 0.010980900128682454 293.34803470127724 +64 512 512 512 90 0.9904758930206299 3.221225472 0.01100528770022922 292.69797934795537 +64 512 512 512 90 0.9823501110076904 3.221225472 0.010915001233418783 295.119112047141 +64 512 512 512 90 0.9806761741638184 3.221225472 0.010896401935153538 295.6228570834755 +64 512 512 512 90 0.9884629249572754 3.221225472 0.01098292138841417 293.2940479204426 +64 512 512 512 90 0.9870820045471191 3.221225472 0.010967577828301324 293.7043641201959 +125 512 512 512 90 0.9970932006835938 3.221225472 0.01107881334092882 290.7554602531051 +125 512 512 512 90 1.0150079727172852 3.221225472 0.011277866363525391 285.6236603776412 +125 512 512 512 90 1.0006930828094482 3.221225472 0.011118812031216091 289.70949980595066 +125 512 512 512 90 1.0041658878326416 3.221225472 0.011157398753696018 288.70756913056744 +125 512 512 512 90 1.0230579376220703 3.221225472 0.011367310418023004 283.37622124691075 +125 512 512 512 90 1.0218260288238525 3.221225472 0.01135362254248725 283.71785832632787 +125 512 512 512 90 1.0313291549682617 3.221225472 0.011459212832980686 281.1035556237346 +125 512 512 512 90 1.0081279277801514 3.221225472 0.011201421419779459 287.57292055024044 +125 512 512 512 90 1.0158588886260986 3.221225472 0.01128732098473443 285.38441286081576 +125 512 512 512 90 0.9918308258056641 3.221225472 0.011020342508951822 292.29812679446206 +125 512 512 512 90 1.0006229877471924 3.221225472 0.011118033197191027 289.7297943681121 +125 512 512 512 90 1.0198469161987305 3.221225472 0.011331632402208117 284.26844056221785 +125 512 512 512 90 1.0079660415649414 3.221225472 0.011199622684054904 287.6191067209893 +125 512 512 512 90 0.996452808380127 3.221225472 0.0110716978708903 290.94232064164646 +125 512 512 512 90 1.016430139541626 3.221225472 0.011293668217129177 285.2240219979499 +125 512 512 512 90 1.0135829448699951 3.221225472 0.011262032720777724 286.0252275823215 +125 512 512 512 90 0.9945950508117676 3.221225472 0.011051056120130751 291.4857581921218 +125 512 512 512 90 1.0017030239105225 3.221225472 0.011130033599005804 289.41740771454073 +125 512 512 512 90 1.0160350799560547 3.221225472 0.011289278666178385 285.3349241568895 +125 512 512 512 90 0.9946188926696777 3.221225472 0.011051321029663086 291.47877103143054 +216 512 512 512 90 1.0110080242156982 3.221225472 0.011233422491285536 286.75370079767805 +216 512 512 512 90 1.003983974456787 3.221225472 0.011155377493964301 288.7598804919751 +216 512 512 512 90 0.9931049346923828 3.221225472 0.011034499274359809 291.92312146732064 +216 512 512 512 90 0.9962389469146729 3.221225472 0.011069321632385254 291.00477689398207 +216 512 512 512 90 0.9913389682769775 3.221225472 0.01101487742529975 292.4431518957498 +216 512 512 512 90 0.9879758358001709 3.221225472 0.010977509286668565 293.4386469535451 +216 512 512 512 90 0.9968409538269043 3.221225472 0.011076010598076714 290.82903482950326 +216 512 512 512 90 0.9944860935211182 3.221225472 0.01104984548356798 291.5176937804447 +216 512 512 512 90 0.9922730922698975 3.221225472 0.01102525658077664 292.1678464713872 +216 512 512 512 90 0.9938778877258301 3.221225472 0.011043087641398112 291.69608868486495 +216 512 512 512 90 1.0026028156280518 3.221225472 0.01114003128475613 289.15766838176495 +216 512 512 512 90 1.000662088394165 3.221225472 0.011118467648824055 289.71847324129175 +216 512 512 512 90 0.9929680824279785 3.221225472 0.011032978693644205 291.9633547244734 +216 512 512 512 90 0.9897878170013428 3.221225472 0.010997642411126031 292.901455746658 +216 512 512 512 90 0.9945991039276123 3.221225472 0.011051101154751247 291.48457035117127 +216 512 512 512 90 0.994642972946167 3.221225472 0.011051588588290744 291.4717143391419 +216 512 512 512 90 0.9960529804229736 3.221225472 0.01106725533803304 291.0591084792394 +216 512 512 512 90 0.991070032119751 3.221225472 0.01101188924577501 292.5225090904274 +216 512 512 512 90 0.996103048324585 3.221225472 0.011067811648050945 291.0444787490815 +216 512 512 512 90 0.9962210655212402 3.221225472 0.011069122950236003 291.01000020343264 +512 512 512 512 90 0.9991440773010254 3.221225472 0.011101600858900282 290.1586458512879 +512 512 512 512 90 0.9981718063354492 3.221225472 0.011090797848171657 290.44127537957297 +512 512 512 512 90 0.9976179599761963 3.221225472 0.011084643999735515 290.6025193120194 +512 512 512 512 90 1.0039019584655762 3.221225472 0.011154466205173068 288.7834713691726 +512 512 512 512 90 1.0002031326293945 3.221225472 0.011113368140326606 289.8514142000998 +512 512 512 512 90 1.0193839073181152 3.221225472 0.011326487859090168 284.3975566013412 +512 512 512 512 90 1.0015339851379395 3.221225472 0.01112815539042155 289.46625554605737 +512 512 512 512 90 1.0001678466796875 3.221225472 0.01111297607421875 289.86164016612935 +512 512 512 512 90 0.998852014541626 3.221225472 0.011098355717129178 290.24348778336304 +512 512 512 512 90 0.9998509883880615 3.221225472 0.011109455426534017 289.953498918261 +512 512 512 512 90 0.9953110218048096 3.221225472 0.011059011353386774 291.276079666336 +512 512 512 512 90 0.9934980869293213 3.221225472 0.011038867632548015 291.80760012940476 +512 512 512 512 90 0.9922981262207031 3.221225472 0.01102553473578559 292.16047558626474 +512 512 512 512 90 0.9951930046081543 3.221225472 0.011057700051201715 291.3106212941567 +512 512 512 512 90 0.9982829093933105 3.221225472 0.01109203232659234 290.40895096179503 +512 512 512 512 90 0.9964919090270996 3.221225472 0.01107213232252333 290.93090456002477 +512 512 512 512 90 0.9925789833068848 3.221225472 0.011028655370076497 292.0778067596519 +512 512 512 512 90 1.0022060871124268 3.221225472 0.011135623190138074 289.2721329555027 +512 512 512 512 90 0.991750955581665 3.221225472 0.0110194550620185 292.3216668946557 +512 512 512 512 90 0.9951310157775879 3.221225472 0.011057011286417643 291.3287676532384 +1000 512 512 512 90 1.0334599018096924 3.221225472 0.01148288779788547 280.5239874061276 +1000 512 512 512 90 1.0170190334320068 3.221225472 0.011300211482577854 285.0588661076244 +1000 512 512 512 90 1.0364751815795898 3.221225472 0.011516390906439886 279.7078961776743 +1000 512 512 512 90 1.0034151077270508 3.221225472 0.011149056752522786 288.92358730446927 +1000 512 512 512 90 1.0056180953979492 3.221225472 0.011173534393310546 288.2906481165447 +1000 512 512 512 90 1.0111370086669922 3.221225472 0.011234855651855469 286.71712141383904 +1000 512 512 512 90 1.0050580501556396 3.221225472 0.011167311668395996 288.4512913807372 +1000 512 512 512 90 1.020353078842163 3.221225472 0.01133725643157959 284.12742460577783 +1000 512 512 512 90 0.9975872039794922 3.221225472 0.011084302266438802 290.6114786993196 +1000 512 512 512 90 1.010585069656372 3.221225472 0.011228722996181911 286.87371423226926 +1000 512 512 512 90 1.0133888721466064 3.221225472 0.011259876357184517 286.0800038842925 +1000 512 512 512 90 0.9983890056610107 3.221225472 0.011093211174011231 290.3780899390583 +1000 512 512 512 90 0.9993908405303955 3.221225472 0.01110434267255995 290.0870017241094 +1000 512 512 512 90 0.9988658428192139 3.221225472 0.011098509364657932 290.23946965866094 +1000 512 512 512 90 1.0087361335754395 3.221225472 0.011208179261949327 287.3995317808438 +1000 512 512 512 90 1.0233879089355469 3.221225472 0.011370976765950521 283.2848521549795 +1000 512 512 512 90 1.0156311988830566 3.221225472 0.01128479109870063 285.44839189543376 +1000 512 512 512 90 0.9957358837127686 3.221225472 0.011063732041252983 291.1517976022123 +1000 512 512 512 90 0.9934961795806885 3.221225472 0.011038846439785428 291.8081603518179 +1000 512 512 512 90 0.9957840442657471 3.221225472 0.011064267158508301 291.1377162040879 +2197 512 512 512 90 1.006438970565796 3.221225472 0.011182655228508843 288.05551152000737 +2197 512 512 512 90 1.000309944152832 3.221225472 0.011114554935031468 289.8204643217124 +2197 512 512 512 90 1.021346092224121 3.221225472 0.011348289913601345 283.8511790344061 +2197 512 512 512 90 1.0026910305023193 3.221225472 0.011141011450025771 289.13222883300676 +2197 512 512 512 90 1.0015809535980225 3.221225472 0.01112867726220025 289.4526812221646 +2197 512 512 512 90 1.0017869472503662 3.221225472 0.011130966080559624 289.3931621645952 +2197 512 512 512 90 0.9995851516723633 3.221225472 0.01110650168524848 290.0306111940173 +2197 512 512 512 90 1.015455961227417 3.221225472 0.011282844013637966 285.4976518426022 +2197 512 512 512 90 1.037642002105713 3.221225472 0.011529355578952365 279.3933667793688 +2197 512 512 512 90 1.0024449825286865 3.221225472 0.011138277583652073 289.2031957192262 +2197 512 512 512 90 1.0173060894012451 3.221225472 0.011303400993347168 284.9784302880092 +2197 512 512 512 90 1.056675910949707 3.221225472 0.011740843454996744 274.36065256700874 +2197 512 512 512 90 1.0010077953338623 3.221225472 0.011122308837042915 289.6184163913602 +2197 512 512 512 90 1.015197992324829 3.221225472 0.0112799776924981 285.57019879058083 +2197 512 512 512 90 0.9989988803863525 3.221225472 0.01109998755984836 290.2008182110076 +2197 512 512 512 90 0.9983718395233154 3.221225472 0.01109302043914795 290.3830827384125 +2197 512 512 512 90 1.011354923248291 3.221225472 0.011237276924981011 286.65534306083174 +2197 512 512 512 90 1.0115690231323242 3.221225472 0.01123965581258138 286.59467208900145 +2197 512 512 512 90 0.9956479072570801 3.221225472 0.011062754525078667 291.17752406940383 +2197 512 512 512 90 1.017198085784912 3.221225472 0.01130220095316569 285.00868860394405 diff --git a/paper/figure/out_diff3D_pareff_noMPI_1.txt b/paper/figure/out_diff3D_pareff_noMPI_1.txt new file mode 100644 index 0000000..bfb86ef --- /dev/null +++ b/paper/figure/out_diff3D_pareff_noMPI_1.txt @@ -0,0 +1,20 @@ +1 512 512 512 90 0.9372940063476562 3.221225472 0.010414377848307291 309.30560797000123 +1 512 512 512 90 0.9418549537658691 3.221225472 0.01046505504184299 307.80779070156836 +1 512 512 512 90 0.939877986907959 3.221225472 0.010443088743421767 308.4552426147954 +1 512 512 512 90 0.9386570453643799 3.221225472 0.010429522726270887 308.8564603139573 +1 512 512 512 90 0.9374549388885498 3.221225472 0.010416165987650553 309.25250959125435 +1 512 512 512 90 0.9442510604858398 3.221225472 0.010491678449842666 307.02670572679494 +1 512 512 512 90 0.9386410713195801 3.221225472 0.010429345236884224 308.86171651580537 +1 512 512 512 90 0.9295392036437988 3.221225472 0.010328213373819986 311.88603056605905 +1 512 512 512 90 0.9415910243988037 3.221225472 0.010462122493320042 307.89406968392115 +1 512 512 512 90 0.9325551986694336 3.221225472 0.010361724429660372 310.87735384848315 +1 512 512 512 90 0.9326050281524658 3.221225472 0.010362278090582953 310.8607435393372 +1 512 512 512 90 0.9398880004882812 3.221225472 0.010443200005425347 308.45195632818877 +1 512 512 512 90 0.9335529804229736 3.221225472 0.010372810893588595 310.5450880234431 +1 512 512 512 90 0.9386279582977295 3.221225472 0.010429199536641438 308.86603144207805 +1 512 512 512 90 0.931318998336792 3.221225472 0.0103479888704088 311.2900015974548 +1 512 512 512 90 0.9340448379516602 3.221225472 0.010378275977240668 310.38155846540184 +1 512 512 512 90 0.9439029693603516 3.221225472 0.010487810770670573 307.13993057619217 +1 512 512 512 90 0.9443621635437012 3.221225472 0.010492912928263346 306.9905844089699 +1 512 512 512 90 0.9342830181121826 3.221225472 0.010380922423468695 310.3024317682605 +1 512 512 512 90 0.9396378993988037 3.221225472 0.010440421104431153 308.53405622047546 diff --git a/paper/figure/out_diff3D_pareff_nordma.txt b/paper/figure/out_diff3D_pareff_nordma.txt new file mode 100644 index 0000000..d7346c7 --- /dev/null +++ b/paper/figure/out_diff3D_pareff_nordma.txt @@ -0,0 +1,180 @@ +1 512 512 512 90 0.9372940063476562 3.221225472 0.010414377848307291 309.30560797000123 +1 512 512 512 90 0.9418549537658691 3.221225472 0.01046505504184299 307.80779070156836 +1 512 512 512 90 0.939877986907959 3.221225472 0.010443088743421767 308.4552426147954 +1 512 512 512 90 0.9386570453643799 3.221225472 0.010429522726270887 308.8564603139573 +1 512 512 512 90 0.9374549388885498 3.221225472 0.010416165987650553 309.25250959125435 +1 512 512 512 90 0.9442510604858398 3.221225472 0.010491678449842666 307.02670572679494 +1 512 512 512 90 0.9386410713195801 3.221225472 0.010429345236884224 308.86171651580537 +1 512 512 512 90 0.9295392036437988 3.221225472 0.010328213373819986 311.88603056605905 +1 512 512 512 90 0.9415910243988037 3.221225472 0.010462122493320042 307.89406968392115 +1 512 512 512 90 0.9325551986694336 3.221225472 0.010361724429660372 310.87735384848315 +1 512 512 512 90 0.9326050281524658 3.221225472 0.010362278090582953 310.8607435393372 +1 512 512 512 90 0.9398880004882812 3.221225472 0.010443200005425347 308.45195632818877 +1 512 512 512 90 0.9335529804229736 3.221225472 0.010372810893588595 310.5450880234431 +1 512 512 512 90 0.9386279582977295 3.221225472 0.010429199536641438 308.86603144207805 +1 512 512 512 90 0.931318998336792 3.221225472 0.0103479888704088 311.2900015974548 +1 512 512 512 90 0.9340448379516602 3.221225472 0.010378275977240668 310.38155846540184 +1 512 512 512 90 0.9439029693603516 3.221225472 0.010487810770670573 307.13993057619217 +1 512 512 512 90 0.9443621635437012 3.221225472 0.010492912928263346 306.9905844089699 +1 512 512 512 90 0.9342830181121826 3.221225472 0.010380922423468695 310.3024317682605 +1 512 512 512 90 0.9396378993988037 3.221225472 0.010440421104431153 308.53405622047546 +8 512 512 512 90 0.9805209636688232 3.221225472 0.010894677374098036 295.6696523807511 +8 512 512 512 90 0.9866609573364258 3.221225472 0.010962899525960286 293.829699375799 +8 512 512 512 90 0.9808030128479004 3.221225472 0.010897811253865559 295.5846267623143 +8 512 512 512 90 0.9871678352355957 3.221225472 0.01096853150261773 293.67882758336685 +8 512 512 512 90 0.9833920001983643 3.221225472 0.010926577779981825 294.8064377394985 +8 512 512 512 90 0.9821388721466064 3.221225472 0.010912654134962294 295.18258639570917 +8 512 512 512 90 0.987868070602417 3.221225472 0.010976311895582411 293.4706577804547 +8 512 512 512 90 0.9853620529174805 3.221225472 0.010948467254638671 294.2170257334627 +8 512 512 512 90 0.980492115020752 3.221225472 0.01089435683356391 295.67835175692784 +8 512 512 512 90 0.9835770130157471 3.221225472 0.010928633477952745 294.75098405472653 +8 512 512 512 90 0.9855170249938965 3.221225472 0.01095018916659885 294.17076024820113 +8 512 512 512 90 0.9810080528259277 3.221225472 0.010900089475843642 295.52284677467605 +8 512 512 512 90 0.9866561889648438 3.221225472 0.01096284654405382 293.8311194137049 +8 512 512 512 90 0.979773998260498 3.221225472 0.010886377758449979 295.8950666120044 +8 512 512 512 90 0.9796319007873535 3.221225472 0.01088479889763726 295.93798675501705 +8 512 512 512 90 0.9798989295959473 3.221225472 0.010887765884399414 295.857341735787 +8 512 512 512 90 0.9837348461151123 3.221225472 0.010930387179056804 294.7036934036552 +8 512 512 512 90 0.9833722114562988 3.221225472 0.010926357905069987 294.8123702322899 +8 512 512 512 90 0.9876101016998291 3.221225472 0.010973445574442546 293.54731384482574 +8 512 512 512 90 0.9800069332122803 3.221225472 0.010888965924580891 295.8247361880676 +27 512 512 512 90 1.0259640216827393 3.221225472 0.011399600240919325 282.5735467843233 +27 512 512 512 90 1.0188629627227783 3.221225472 0.011320699585808648 284.54296906156304 +27 512 512 512 90 1.023468017578125 3.221225472 0.011371866861979166 283.2626789511477 +27 512 512 512 90 1.0134360790252686 3.221225472 0.01126040087805854 286.0666779880564 +27 512 512 512 90 1.0192108154296875 3.221225472 0.011324564615885416 284.44585564741794 +27 512 512 512 90 1.0172181129455566 3.221225472 0.011302423477172852 285.00307730512907 +27 512 512 512 90 1.0219600200653076 3.221225472 0.011355111334058974 283.68065950512766 +27 512 512 512 90 1.0158379077911377 3.221225472 0.011287087864345974 285.3903071114838 +27 512 512 512 90 1.0157220363616943 3.221225472 0.011285800404018826 285.42286383630665 +27 512 512 512 90 1.019331932067871 3.221225472 0.01132591035630968 284.41205789744316 +27 512 512 512 90 1.0130431652069092 3.221225472 0.011256035168965657 286.1776303685808 +27 512 512 512 90 1.0182631015777588 3.221225472 0.011314034461975098 284.71059398184553 +27 512 512 512 90 1.0200870037078857 3.221225472 0.01133430004119873 284.2015351888743 +27 512 512 512 90 1.0138649940490723 3.221225472 0.011265166600545247 285.94565763848436 +27 512 512 512 90 1.0143451690673828 3.221225472 0.011270501878526476 285.8102954702802 +27 512 512 512 90 1.0075318813323975 3.221225472 0.011194798681471083 287.7430460032807 +27 512 512 512 90 1.0179579257965088 3.221225472 0.01131064361996121 284.7959479790459 +27 512 512 512 90 1.0140039920806885 3.221225472 0.01126671102311876 285.90646066897403 +27 512 512 512 90 1.0189950466156006 3.221225472 0.011322167184617784 284.5060861118827 +27 512 512 512 90 1.0126969814300537 3.221225472 0.011252188682556152 286.27545830205867 +64 512 512 512 90 1.024595022201538 3.221225472 0.011384389135572645 282.95110380008714 +64 512 512 512 90 1.0188770294189453 3.221225472 0.011320855882432725 284.53904063901876 +64 512 512 512 90 1.024925947189331 3.221225472 0.011388066079881456 282.8597454040705 +64 512 512 512 90 1.0149600505828857 3.221225472 0.011277333895365397 285.63714632266186 +64 512 512 512 90 1.0179758071899414 3.221225472 0.01131084230211046 284.7909453568246 +64 512 512 512 90 1.0129811763763428 3.221225472 0.011255346404181587 286.19514285257804 +64 512 512 512 90 1.0204441547393799 3.221225472 0.01133826838599311 284.10206588330425 +64 512 512 512 90 1.0188329219818115 3.221225472 0.011320365799797906 284.5513589373151 +64 512 512 512 90 1.0134727954864502 3.221225472 0.011260808838738336 286.0563142603624 +64 512 512 512 90 1.0123369693756104 3.221225472 0.011248188548617893 286.3772649326548 +64 512 512 512 90 1.0179169178009033 3.221225472 0.011310187975565593 284.80742132306733 +64 512 512 512 90 1.018286943435669 3.221225472 0.011314299371507432 284.7039278553956 +64 512 512 512 90 1.0203800201416016 3.221225472 0.011337555779351128 284.1199227320897 +64 512 512 512 90 1.018629789352417 3.221225472 0.01131810877058241 284.6081034644661 +64 512 512 512 90 1.0183768272399902 3.221225472 0.011315298080444337 284.67879936517824 +64 512 512 512 90 1.017143964767456 3.221225472 0.01130159960852729 285.0238535764016 +64 512 512 512 90 1.0198941230773926 3.221225472 0.01133215692308214 284.2552828966549 +64 512 512 512 90 1.0157451629638672 3.221225472 0.01128605736626519 285.416365295862 +64 512 512 512 90 1.0269749164581299 3.221225472 0.011410832405090332 282.2953977102514 +64 512 512 512 90 1.0178780555725098 3.221225472 0.011309756173027886 284.8182951708677 +125 512 512 512 90 1.0315780639648438 3.221225472 0.011461978488498264 281.03572827609116 +125 512 512 512 90 1.0250580310821533 3.221225472 0.011389533678690592 282.823297500476 +125 512 512 512 90 1.0340421199798584 3.221225472 0.011489356888665094 280.3660381703281 +125 512 512 512 90 1.0235490798950195 3.221225472 0.011372767554389106 283.2402453136245 +125 512 512 512 90 1.0225999355316162 3.221225472 0.011362221505906846 283.5031397975643 +125 512 512 512 90 1.03078293800354 3.221225472 0.01145314375559489 281.25251378482204 +125 512 512 512 90 1.028069019317627 3.221225472 0.011422989103529188 281.9949702136008 +125 512 512 512 90 1.0197720527648926 3.221225472 0.011330800586276585 284.28930925687814 +125 512 512 512 90 1.027782917022705 3.221225472 0.011419810189141167 282.07346870467154 +125 512 512 512 90 1.0466759204864502 3.221225472 0.011629732449849446 276.98190701211706 +125 512 512 512 90 1.0260732173919678 3.221225472 0.01140081352657742 282.5434750327881 +125 512 512 512 90 1.0255498886108398 3.221225472 0.011394998762342665 282.68765439845976 +125 512 512 512 90 1.0263237953186035 3.221225472 0.01140359772576226 282.4744917757681 +125 512 512 512 90 1.0180480480194092 3.221225472 0.011311644977993435 284.7707365521837 +125 512 512 512 90 1.021589994430542 3.221225472 0.011350999938117133 283.7834102335769 +125 512 512 512 90 1.039639949798584 3.221225472 0.011551554997762043 278.85643730424766 +125 512 512 512 90 1.0382180213928223 3.221225472 0.01153575579325358 279.2383550528921 +125 512 512 512 90 1.026151180267334 3.221225472 0.011401679780748155 282.5220085060685 +125 512 512 512 90 1.0319550037384033 3.221225472 0.011466166708204481 280.9330750175723 +125 512 512 512 90 1.0234251022338867 3.221225472 0.011371390024820963 283.2745570215121 +216 512 512 512 90 1.0250859260559082 3.221225472 0.011389843622843425 282.8156012203296 +216 512 512 512 90 1.0291941165924072 3.221225472 0.01143549018436008 281.6866981710637 +216 512 512 512 90 1.021780014038086 3.221225472 0.011353111267089844 283.73063526098076 +216 512 512 512 90 1.0198049545288086 3.221225472 0.011331166161431207 284.28013728757605 +216 512 512 512 90 1.022798776626587 3.221225472 0.011364430851406522 283.448024288988 +216 512 512 512 90 1.024284839630127 3.221225472 0.011380942662556966 283.03678943904674 +216 512 512 512 90 1.0317859649658203 3.221225472 0.011464288499620225 280.9791006312087 +216 512 512 512 90 1.0252671241760254 3.221225472 0.011391856935289171 282.76561848502814 +216 512 512 512 90 1.0273308753967285 3.221225472 0.011414787504408094 282.19758543521255 +216 512 512 512 90 1.0252220630645752 3.221225472 0.011391356256273058 282.7780467515549 +216 512 512 512 90 1.0219321250915527 3.221225472 0.011354801389906142 283.6884029397036 +216 512 512 512 90 1.0217931270599365 3.221225472 0.011353256967332628 283.72699404837005 +216 512 512 512 90 1.0233349800109863 3.221225472 0.011370388666788737 283.29950421208855 +216 512 512 512 90 1.0295839309692383 3.221225472 0.011439821455213758 281.5800477840421 +216 512 512 512 90 1.023813009262085 3.221225472 0.011375700102912055 283.16722864163773 +216 512 512 512 90 1.0218260288238525 3.221225472 0.01135362254248725 283.71785832632787 +216 512 512 512 90 1.0258259773254395 3.221225472 0.011398066414727105 282.61157241880517 +216 512 512 512 90 1.0240209102630615 3.221225472 0.011378010114034018 283.10973884852086 +216 512 512 512 90 1.0236899852752686 3.221225472 0.011374333169725206 283.2012588284173 +216 512 512 512 90 1.0213518142700195 3.221225472 0.011348353491889106 283.8495887797533 +512 512 512 512 90 1.0303359031677246 3.221225472 0.011448176701863606 281.37454163121265 +512 512 512 512 90 1.0290100574493408 3.221225472 0.011433445082770454 281.7370835019973 +512 512 512 512 90 1.0246670246124268 3.221225472 0.011385189162360298 282.93122108585135 +512 512 512 512 90 1.033588171005249 3.221225472 0.011484313011169433 280.489174134064 +512 512 512 512 90 1.0225529670715332 3.221225472 0.011361699634128147 283.5161618182652 +512 512 512 512 90 1.022284984588623 3.221225472 0.0113587220509847 283.5904829382411 +512 512 512 512 90 1.0221688747406006 3.221225472 0.011357431941562229 283.6226964488344 +512 512 512 512 90 1.0271661281585693 3.221225472 0.01141295697953966 282.24284712321133 +512 512 512 512 90 1.0199179649353027 3.221225472 0.011332421832614474 284.2486380739358 +512 512 512 512 90 1.025109052658081 3.221225472 0.01139010058508979 282.8092208612051 +512 512 512 512 90 1.025683879852295 3.221225472 0.011396487553914389 282.6507252134536 +512 512 512 512 90 1.0211849212646484 3.221225472 0.01134649912516276 283.89597852754366 +512 512 512 512 90 1.0226640701293945 3.221225472 0.011362934112548828 283.4853603914319 +512 512 512 512 90 1.027890920639038 3.221225472 0.011421010229322645 282.04383038986595 +512 512 512 512 90 1.0252108573913574 3.221225472 0.011391231748792861 282.7811375483039 +512 512 512 512 90 1.0222411155700684 3.221225472 0.011358234617445204 283.60265309650265 +512 512 512 512 90 1.0271611213684082 3.221225472 0.011412901348537868 282.2442228866438 +512 512 512 512 90 1.0223748683929443 3.221225472 0.011359720759921603 283.5655506044526 +512 512 512 512 90 1.023226022720337 3.221225472 0.011369178030225966 283.32967110164753 +512 512 512 512 90 1.0279240608215332 3.221225472 0.01142137845357259 282.0347373212561 +1000 512 512 512 90 1.0321760177612305 3.221225472 0.011468622419569228 280.8729204044187 +1000 512 512 512 90 1.0510649681091309 3.221225472 0.011678499645657009 275.8252831902004 +1000 512 512 512 90 1.0273869037628174 3.221225472 0.011415410041809081 282.18219583897746 +1000 512 512 512 90 1.0277771949768066 3.221225472 0.011419746610853408 282.075039120266 +1000 512 512 512 90 1.0329020023345947 3.221225472 0.01147668891482883 280.67550631592974 +1000 512 512 512 90 1.0249109268188477 3.221225472 0.011387899186876086 282.86389079666964 +1000 512 512 512 90 1.037930965423584 3.221225472 0.011532566282484266 279.3155827677676 +1000 512 512 512 90 1.0536549091339111 3.221225472 0.011707276768154569 275.14728965511296 +1000 512 512 512 90 1.0262329578399658 3.221225472 0.011402588420444065 282.4994951343295 +1000 512 512 512 90 1.0329978466033936 3.221225472 0.011477753851148817 280.6494645010692 +1000 512 512 512 90 1.0336041450500488 3.221225472 0.011484490500556098 280.4848392572594 +1000 512 512 512 90 1.0281410217285156 3.221225472 0.01142378913031684 281.9752216409004 +1000 512 512 512 90 1.0298659801483154 3.221225472 0.011442955334981282 281.5029315156607 +1000 512 512 512 90 1.023298978805542 3.221225472 0.01136998865339491 283.3094711170349 +1000 512 512 512 90 1.0275418758392334 3.221225472 0.01141713195376926 282.13963761157567 +1000 512 512 512 90 1.0317161083221436 3.221225472 0.011463512314690484 280.998125493528 +1000 512 512 512 90 1.0305440425872803 3.221225472 0.011450489362080893 281.3177122951021 +1000 512 512 512 90 1.024986982345581 3.221225472 0.011388744248284234 282.8429018840503 +1000 512 512 512 90 1.0292878150939941 3.221225472 0.011436531278822158 281.6610555654207 +1000 512 512 512 90 1.0286939144134521 3.221225472 0.01142993238237169 281.82366826317144 +2197 512 512 512 90 1.048914909362793 3.221225472 0.011654610104031033 276.3906680057757 +2197 512 512 512 90 1.031451940536499 3.221225472 0.011460577117072211 281.07009263970764 +2197 512 512 512 90 1.0249459743499756 3.221225472 0.011388288603888618 282.85421840293793 +2197 512 512 512 90 1.0257868766784668 3.221225472 0.011397631963094075 282.62234492484396 +2197 512 512 512 90 1.0331628322601318 3.221225472 0.011479587025112576 280.6046476195785 +2197 512 512 512 90 1.0406270027160645 3.221225472 0.011562522252400715 278.59193709496907 +2197 512 512 512 90 1.029627799987793 3.221225472 0.011440308888753255 281.56805059404684 +2197 512 512 512 90 1.030975103378296 3.221225472 0.01145527892642551 281.2000906035683 +2197 512 512 512 90 1.0281450748443604 3.221225472 0.011423834164937337 281.9741100485127 +2197 512 512 512 90 1.0318272113800049 3.221225472 0.011464746793111165 280.9678687309118 +2197 512 512 512 90 1.0405371189117432 3.221225472 0.011561523543463813 278.61600245765936 +2197 512 512 512 90 1.0330379009246826 3.221225472 0.01147819889916314 280.63858278626407 +2197 512 512 512 90 1.0484299659729004 3.221225472 0.011649221844143338 276.5185104290443 +2197 512 512 512 90 1.0340020656585693 3.221225472 0.01148891184065077 280.3768987592422 +2197 512 512 512 90 1.054955005645752 3.221225472 0.0117217222849528 274.8082059694499 +2197 512 512 512 90 1.0309948921203613 3.221225472 0.011455498801337349 281.1946932964582 +2197 512 512 512 90 1.0436489582061768 3.221225472 0.011596099535624186 277.7852554735432 +2197 512 512 512 90 1.0267071723937988 3.221225472 0.011407857471042209 282.3690145302729 +2197 512 512 512 90 1.029412031173706 3.221225472 0.011437911457485623 281.62706836586375 +2197 512 512 512 90 1.0439260005950928 3.221225472 0.01159917778438992 277.71153541030293 diff --git a/paper/header.tex b/paper/header.tex new file mode 100644 index 0000000..a046333 --- /dev/null +++ b/paper/header.tex @@ -0,0 +1,20 @@ +% **************GENERATED FILE, DO NOT EDIT************** + +\title{Distributed Parallelization of xPU Stencil Computations in Julia} + +\author[1]{Samuel Omlin} +\author[2, 3]{Ludovic R\"ass} +\author[2, 3]{Ivan Utkin} +\affil[1]{Swiss National Supercomputing Centre (CSCS), ETH Zurich, Lugano, Switzerland} +\affil[2]{Laboratory of Hydraulics, Hydrology and Glaciology (VAW), ETH Zurich, Zurich, Switzerland} +\affil[3]{Swiss Federal Institute for Forest, Snow and Landscape Research (WSL), Birmensdorf, Switzerland} + +\keywords{Julia, Distributed Parallelization, xPU, GPU, Supercomputing, Stencil Computations, Staggered Grid} + +\hypersetup{ +pdftitle = {Distributed Parallelization of xPU Stencil Computations in Julia}, +pdfsubject = {JuliaCon 2019 Proceedings}, +pdfauthor = {Samuel Omlin, Ludovic R\"ass, Ivan Utkin}, +pdfkeywords = {Julia, Distributed Parallelization, xPU, GPU, Supercomputing, Stencil Computations, Staggered Grid}, +} + diff --git a/paper/jlcode.sty b/paper/jlcode.sty new file mode 100644 index 0000000..affd6a3 --- /dev/null +++ b/paper/jlcode.sty @@ -0,0 +1,420 @@ +%% +%% Julia definition (c) 2018 by wg030 +%% +%% +%% +% keywords, literals and built-ins from: +% https://github.com/isagalaev/highlight.js/blob/master/src/languages/julia.js +% colors from: +% https://docs.julialang.org/en/stable/assets/highlightjs/default.css +% https://docs.julialang.org/en/stable/assets/documenter.css +% special unicode characters from: +% https://docs.julialang.org/en/stable/manual/unicode-input/ + + + + + +% defining the jlcode package +\def\fileversion{2.1} +\def\filedate{2018/03/06} + +\typeout{-- Package: `jlcode' \fileversion\space <\filedate> --} +\NeedsTeXFormat{LaTeX2e} +\ProvidesPackage{jlcode}[\filedate\space\fileversion] + + + + + +% loading required packages +\RequirePackage{listings} +\RequirePackage{xcolor} % for coloring +\RequirePackage{textcomp} % for upright single quotes +\RequirePackage{amssymb} % for the ϰ symbol +\RequirePackage{eurosym} % for the € symbol +\PassOptionsToPackage{T1}{fontenc} +\RequirePackage{fontenc} % for the « and » symbols +\RequirePackage{calc} % for the creation of the code box + + + + + +% julia language definition +\lstdefinelanguage{julia} +{% +% +% julia's keywords: +% +morekeywords=[1] +{% +in,isa,where,baremodule,begin,break,catch,ccall,const,continue,do,else,elseif,% +end,export,finally,for,function,global,if,import,importall,let,local,macro,% +module,quote,return,try,using,while,struct,mutable,primitive,% +% legacy, to be deprecated in the next release +type,immutable,abstract,bitstype,typealias% +},% +% +% julia's literals: +% +morekeywords=[2] +{% +true,false,ARGS,C_NULL,DevNull,ENDIAN_BOM,ENV,I,Inf,Inf16,Inf32,Inf64,% +InsertionSort,JULIA_HOME,LOAD_PATH,MergeSort,NaN,NaN16,NaN32,NaN64,% +PROGRAM_FILE,QuickSort,RoundDown,RoundFromZero,RoundNearest,% +RoundNearestTiesAway,RoundNearestTiesUp,RoundToZero,RoundUp,STDERR,STDIN,% +STDOUT,VERSION,catalan,e,eu,eulergamma,golden,im,nothing,pi,γ,π,φ% +},% +% +% julia's built-ins: +% +morekeywords=[3] +{% +ANY,AbstractArray,AbstractChannel,AbstractFloat,AbstractMatrix,AbstractRNG,% +AbstractSerializer,AbstractSet,AbstractSparseArray,AbstractSparseMatrix,% +AbstractSparseVector,AbstractString,AbstractUnitRange,AbstractVecOrMat,% +AbstractVector,Any,ArgumentError,Array,AssertionError,Associative,% +Base64DecodePipe,Base64EncodePipe,Bidiagonal,BigFloat,BigInt,BitArray,% +BitMatrix,BitVector,Bool,BoundsError,BufferStream,CachingPool,% +CapturedException,CartesianIndex,CartesianRange,Cchar,Cdouble,Cfloat,Channel,% +Char,Cint,Cintmax_t,Clong,Clonglong,ClusterManager,Cmd,CodeInfo,Colon,Complex,% +Complex128,Complex32,Complex64,CompositeException,Condition,ConjArray,% +ConjMatrix,ConjVector,Cptrdiff_t,Cshort,Csize_t,Cssize_t,Cstring,Cuchar,Cuint,% +Cuintmax_t,Culong,Culonglong,Cushort,Cwchar_t,Cwstring,DataType,Date,% +DateFormat,DateTime,DenseArray,DenseMatrix,DenseVecOrMat,DenseVector,Diagonal,% +Dict,DimensionMismatch,Dims,DirectIndexString,Display,DivideError,DomainError,% +EOFError,EachLine,Enum,Enumerate,ErrorException,Exception,ExponentialBackOff,% +Expr,Factorization,FileMonitor,Float16,Float32,Float64,Function,Future,% +GlobalRef,GotoNode,HTML,Hermitian,IO,IOBuffer,IOContext,IOStream,IPAddr,IPv4,% +IPv6,IndexCartesian,IndexLinear,IndexStyle,InexactError,InitError,Int,Int128,% +Int16,Int32,Int64,Int8,IntSet,Integer,InterruptException,InvalidStateException,% +Irrational,KeyError,LabelNode,LinSpace,LineNumberNode,LoadError,% +LowerTriangular,MIME,Matrix,MersenneTwister,Method,MethodError,MethodTable,% +Module,NTuple,NewvarNode,NullException,Nullable,Number,ObjectIdDict,% +OrdinalRange,OutOfMemoryError,OverflowError,Pair,ParseError,PartialQuickSort,% +PermutedDimsArray,Pipe,PollingFileWatcher,ProcessExitedException,Ptr,QuoteNode,% +RandomDevice,Range,RangeIndex,Rational,RawFD,ReadOnlyMemoryError,Real,% +ReentrantLock,Ref,Regex,RegexMatch,RemoteChannel,RemoteException,RevString,% +RoundingMode,RowVector,SSAValue,SegmentationFault,SerializationState,Set,% +SharedArray,SharedMatrix,SharedVector,Signed,SimpleVector,Slot,SlotNumber,% +SparseMatrixCSC,SparseVector,StackFrame,StackOverflowError,StackTrace,% +StepRange,StepRangeLen,StridedArray,StridedMatrix,StridedVecOrMat,% +StridedVector,String,SubArray,SubString,SymTridiagonal,Symbol,Symmetric,% +SystemError,TCPSocket,Task,Text,TextDisplay,Timer,Tridiagonal,Tuple,Type,% +TypeError,TypeMapEntry,TypeMapLevel,TypeName,TypeVar,TypedSlot,UDPSocket,UInt,% +UInt128,UInt16,UInt32,UInt64,UInt8,UndefRefError,UndefVarError,UnicodeError,% +UniformScaling,Union,UnionAll,UnitRange,Unsigned,UpperTriangular,Val,Vararg,% +VecElement,VecOrMat,Vector,VersionNumber,Void,WeakKeyDict,WeakRef,WorkerConfig,% +WorkerPool% +},% +% +% +sensitive=true,% +% +alsoother={$},%$ +% +morecomment=[l]{\#},% +morecomment=[n]{\#=}{=\#},% +% +morestring=[b]{"},% +morestring=[m]{'},% +morestring=[s]{"""}{"""},% +morestring=[s]{r"}{"},% +morestring=[s]{b"}{"},% +morestring=[s]{v"}{"},% +morestring=[s]{raw"}{"},% +morestring=[s]{L"}{"},% +% +}[keywords,comments,strings] + + +% defining the colors for +\definecolor{jlbase}{rgb}{.28,.28,.28} % julia's base color +\definecolor{jlkeyword}{rgb}{0.4, 0.0, 0.3} % julia's keywords +\definecolor{jlliteral}{HTML}{78A960} % julia's literals +\definecolor{jlbuiltin}{HTML}{397300} % julia's built-ins +\definecolor{jlcomment}{HTML}{888888} % julia's comments +\definecolor{jlstring}{HTML}{880000} % julia's strings +\definecolor{jlbackground}{HTML}{F5F5F5} % the background of the code block +\definecolor{jlrule}{HTML}{DDDDDD} % the rule of the code block + + +% defining the ucc and the ucclit command +% for literating special unicode characters +\newcommand{\ucc}[1]{% +\ifnum\lst@mode=\lst@Pmode\relax% +{\color{jlbase}#1}% +\else% +#1% +\fi% +} + +\newcommand{\ucclit}[1]{% +\ifnum\lst@mode=\lst@Pmode\relax% +{\color{jlliteral}#1}% +\else% +#1% +\fi% +} + + +% defining a new opliterate key +\def\lst@OpLiteratekey#1\@nil@{\let\lst@ifxopliterate\lst@if + \def\lst@opliterate{#1}} +\lst@Key{opliterate}{}{\@ifstar{\lst@true \lst@OpLiteratekey} + {\lst@false\lst@OpLiteratekey}#1\@nil@} +\lst@AddToHook{SelectCharTable} + {\ifx\lst@opliterate\@empty\else + \expandafter\lst@OpLiterate\lst@opliterate{}\relax\z@ + \fi} +\def\lst@OpLiterate#1#2#3{% + \ifx\relax#2\@empty\else + \lst@CArgX #1\relax\lst@CDef + {} + {\let\lst@next\@empty + \lst@ifxopliterate + \lst@ifmode \let\lst@next\lst@CArgEmpty \fi + \fi + \ifx\lst@next\@empty + \ifx\lst@OutputBox\@gobble\else + \lst@XPrintToken \let\lst@scanmode\lst@scan@m + \lst@token{#2}\lst@length#3\relax + \lst@XPrintToken + \fi + \let\lst@next\lst@CArgEmptyGobble + \fi + \lst@next}% + \@empty + \expandafter\lst@OpLiterate + \fi} + + +% defining the \addlitjlbase and \addlitjlstring commands, +% which help a user to fix some of the known managable issues +\def\addToLiterate#1{% +\protected@edef\lst@literate{% +\unexpanded\expandafter{\lst@literate}\unexpanded{#1}}} +\lst@Key{expandliterate}{}{\addToLiterate{#1}} +\newcommand{\addlitjlbase}[3]{% +\lstset{expandliterate={#1}{{{\color{jlbase}#2}}}{#3}}} +\newcommand{\addlitjlstring}[3]{% +\lstset{expandliterate={#1}{{{\color{jlstring}#2}}}{#3}}} + + + + + +% defining the styles for +\lstset{keywordstyle={[1]\color{jlkeyword}\bfseries}} % julia's keywords +\lstset{keywordstyle={[2]\color{jlliteral}}} % julia's literals +\lstset{keywordstyle={[3]\color{jlbuiltin}}} % julia's built-ins +\lstset{commentstyle={\color{jlcomment}}} % julia's comments +\lstset{stringstyle={\color{jlstring}}} % julia's strings +\lstset{identifierstyle={\color{jlbase}}} % julia's identifiers + + +\lstset{opliterate=* +% +% julia's operators +% +{\\}{{{\color{jlbase}\lstum@backslash}}}{1} {\{}{{{\color{jlbase}\{}}}{1} +{\}}{{{\color{jlbase}\}}}}{1} {!}{{{\color{jlbase}!}}}{1} +{\%}{{{\color{jlbase}\%}}}{1} {&}{{{\color{jlbase}\&}}}{1} +{(}{{{\color{jlbase}(}}}{1} {)}{{{\color{jlbase})}}}{1} +{*}{{{\color{jlbase}*}}}{1} {+}{{{\color{jlbase}+}}}{1} +{,}{{{\color{jlbase},}}}{1} {-}{{{\color{jlbase}-}}}{1} +{.}{{{\color{jlbase}.}}}{1} {/}{{{\color{jlbase}/}}}{1} +{:}{{{\color{jlbase}:}}}{1} {;}{{{\color{jlbase};}}}{1} +{<}{{{\color{jlbase}<}}}{1} {=}{{{\color{jlbase}=}}}{1} +{>}{{{\color{jlbase}>}}}{1} {?}{{{\color{jlbase}?}}}{1} +{[}{{{\color{jlbase}[}}}{1} {]}{{{\color{jlbase}]}}}{1} +{^}{{{\color{jlbase}\^{}}}}{1} {|}{{{\color{jlbase}|}}}{1} +{~}{{{\color{jlbase}\textasciitilde{}}}}{1} +% +% julia's numbers +% +{.0}{{{\color{jlstring}.0}}}{2} {.1}{{{\color{jlstring}.1}}}{2} +{.2}{{{\color{jlstring}.2}}}{2} {.3}{{{\color{jlstring}.3}}}{2} +{.4}{{{\color{jlstring}.4}}}{2} {.5}{{{\color{jlstring}.5}}}{2} +{.6}{{{\color{jlstring}.6}}}{2} {.7}{{{\color{jlstring}.7}}}{2} +{.8}{{{\color{jlstring}.8}}}{2} {.9}{{{\color{jlstring}.9}}}{2} +% +{e+0}{{{\color{jlstring}e+0}}}{3} {e+1}{{{\color{jlstring}e+1}}}{3} +{e+2}{{{\color{jlstring}e+2}}}{3} {e+3}{{{\color{jlstring}e+3}}}{3} +{e+4}{{{\color{jlstring}e+4}}}{3} {e+5}{{{\color{jlstring}e+5}}}{3} +{e+6}{{{\color{jlstring}e+6}}}{3} {e+7}{{{\color{jlstring}e+7}}}{3} +{e+8}{{{\color{jlstring}e+8}}}{3} {e+9}{{{\color{jlstring}e+9}}}{3} +% +{0E+}{{{\color{jlstring}0E+}}}{3} {1E+}{{{\color{jlstring}1E+}}}{3} +{2E+}{{{\color{jlstring}2E+}}}{3} {3E+}{{{\color{jlstring}3E+}}}{3} +{4E+}{{{\color{jlstring}4E+}}}{3} {5E+}{{{\color{jlstring}5E+}}}{3} +{6E+}{{{\color{jlstring}6E+}}}{3} {7E+}{{{\color{jlstring}7E+}}}{3} +{8E+}{{{\color{jlstring}8E+}}}{3} {9E+}{{{\color{jlstring}9E+}}}{3} +% +{e-0}{{{\color{jlstring}e-0}}}{3} {e-1}{{{\color{jlstring}e-1}}}{3} +{e-2}{{{\color{jlstring}e-2}}}{3} {e-3}{{{\color{jlstring}e-3}}}{3} +{e-4}{{{\color{jlstring}e-4}}}{3} {e-5}{{{\color{jlstring}e-5}}}{3} +{e-6}{{{\color{jlstring}e-6}}}{3} {e-7}{{{\color{jlstring}e-7}}}{3} +{e-8}{{{\color{jlstring}e-8}}}{3} {e-9}{{{\color{jlstring}e-9}}}{3} +% +{0E-}{{{\color{jlstring}0E-}}}{3} {1E-}{{{\color{jlstring}1E-}}}{3} +{2E-}{{{\color{jlstring}2E-}}}{3} {3E-}{{{\color{jlstring}3E-}}}{3} +{4E-}{{{\color{jlstring}4E-}}}{3} {5E-}{{{\color{jlstring}5E-}}}{3} +{6E-}{{{\color{jlstring}6E-}}}{3} {7E-}{{{\color{jlstring}7E-}}}{3} +{8E-}{{{\color{jlstring}8E-}}}{3} {9E-}{{{\color{jlstring}9E-}}}{3} +} + + +% special unicode characters +%\lstset{inputencoding=utf8} +%\DeclareUnicodeCharacter{0391}{A} +\lstset{extendedchars=true} +\lstset{literate= +% +% characters that appear in latin languages +% +{á}{{\'a}}{1} {é}{{\'e}}{1} {í}{{\'i}}{1} {ó}{{\'o}}{1} {ú}{{\'u}}{1} +{Á}{{\'A}}{1} {É}{{\'E}}{1} {Í}{{\'I}}{1} {Ó}{{\'O}}{1} {Ú}{{\'U}}{1} +{à}{{\`a}}{1} {è}{{\`e}}{1} {ì}{{\`i}}{1} {ò}{{\`o}}{1} {ù}{{\`u}}{1} +{À}{{\`A}}{1} {È}{{\'E}}{1} {Ì}{{\`I}}{1} {Ò}{{\`O}}{1} {Ù}{{\`U}}{1} +{ä}{{\"a}}{1} {ë}{{\"e}}{1} {ï}{{\"i}}{1} {ö}{{\"o}}{1} {ü}{{\"u}}{1} +{Ä}{{\"A}}{1} {Ë}{{\"E}}{1} {Ï}{{\"I}}{1} {Ö}{{\"O}}{1} {Ü}{{\"U}}{1} +{â}{{\^a}}{1} {ê}{{\^e}}{1} {î}{{\^i}}{1} {ô}{{\^o}}{1} {û}{{\^u}}{1} +{Â}{{\^A}}{1} {Ê}{{\^E}}{1} {Î}{{\^I}}{1} {Ô}{{\^O}}{1} {Û}{{\^U}}{1} +{œ}{{\oe}}{1} {Œ}{{\OE}}{1} {æ}{{\ae}}{1} {Æ}{{\AE}}{1} {ß}{{\ss}}{1} +{ű}{{\H{u}}}{1} {Ű}{{\H{U}}}{1} {ő}{{\H{o}}}{1} {Ő}{{\H{O}}}{1} +{ç}{{\c c}}{1} {Ç}{{\c C}}{1} {ø}{{\o}}{1} {å}{{\r a}}{1} {Å}{{\r A}}{1} +{€}{{\euro}}{1} {£}{{\pounds}}{1} {«}{{\guillemotleft}}{1} +{»}{{\guillemotright}}{1} {ñ}{{\~n}}{1} {Ñ}{{\~N}}1 {¿}{{?`}}{1} +% +% greek capital letters +% +{Α}{{\ucc{A}}}{1} {Β}{{\ucc{B}}}{1} {Γ}{{\ucc{$\Gamma$}}}{1} +{Δ}{{\ucc{$\Delta$}}}{1} {Ε}{{\ucc{E}}}{1} {Ζ}{{\ucc{Z}}}{1} +{Η}{{\ucc{H}}}{1} {Θ}{{\ucc{$\Theta$}}}{1} {Ι}{{\ucc{I}}}{1} +{Κ}{{\ucc{K}}}{1} {Λ}{{\ucc{$\Lambda$}}}{1} {Μ}{{\ucc{M}}}{1} +{Ν}{{\ucc{N}}}{1} {Ξ}{{\ucc{$\Xi$}}}{1} {Ο}{{\ucc{O}}}{1} +{Π}{{\ucc{$\Pi$}}}{1} {Ρ}{{\ucc{P}}}{1} {Σ}{{\ucc{$\Sigma$}}}{1} +{Τ}{{\ucc{T}}}{1} {Υ}{{\ucc{$\Upsilon$}}}{1} {Φ}{{\ucc{$\Phi$}}}{1} +{Χ}{{\ucc{X}}}{1} {Ψ}{{\ucc{$\Psi$}}}{1} {Ω}{{\ucc{$\Omega$}}}{1} +% +% mircro sign + latin small letter open e +% +{µ}{{\ucc{$\mu$}}}{1} {ɛ}{{\ucc{$\varepsilon$}}}{1} +% +% greek small letters +% +{α}{{\ucc{$\alpha$}}}{1} {β}{{\ucc{$\beta$}}}{1} {γ}{{\ucclit{$\gamma$}}}{1} +{δ}{{\ucc{$\delta$}}}{1} {ε}{{\ucc{$\varepsilon$}}}{1} +{ϵ}{{\ucc{$\epsilon$}}}{1} {ζ}{{\ucc{$\zeta$}}}{1} {η}{{\ucc{$\eta$}}}{1} +{θ}{{\ucc{$\theta$}}}{1} {ϑ}{{\ucc{$\vartheta$}}}{1} {ι}{{\ucc{$\iota$}}}{1} +{κ}{{\ucc{$\kappa$}}}{1} {ϰ}{{\ucc{$\varkappa$}}}{1} {λ}{{\ucc{$\lambda$}}}{1} +{μ}{{\ucc{$\mu$}}}{1} {ν}{{\ucc{$\nu$}}}{1} {ξ}{{\ucc{$\xi$}}}{1} +{ο}{{\ucc{o}}}{1} {π}{{\ucclit{$\pi$}}}{1} {ϖ}{{\ucc{$\varpi$}}}{1} +{ρ}{{\ucc{$\rho$}}}{1} {ϱ}{{\ucc{$\varrho$}}}{1} {σ}{{\ucc{$\sigma$}}}{1} +{ς}{{\ucc{$\varsigma$}}}{1} {τ}{{\ucc{$\tau$}}}{1} {υ}{{\ucc{$\upsilon$}}}{1} +{φ}{{\ucclit{$\phi$}}}{1} {ϕ}{{\ucc{$\varphi$}}}{1} {χ}{{\ucc{$\chi$}}}{1} +{ψ}{{\ucc{$\psi$}}}{1} {ω}{{\ucc{$\omega$}}}{1} +% +% superscripts +% +{⁽}{{\ucc{${\scriptstyle {}^{(}}$}}}{1} {⁾}{{\ucc{${\scriptstyle {}^{)}}$}}}{1} +{⁺}{{\ucc{${\scriptstyle {}^{+}}$}}}{1} {⁻}{{\ucc{${\scriptstyle {}^{-}}$}}}{1} +{⁰}{{\ucc{${\scriptstyle {}^{0}}$}}}{1} {¹}{{\ucc{${\scriptstyle {}^{1}}$}}}{1} +{²}{{\ucc{${\scriptstyle {}^{2}}$}}}{1} {³}{{\ucc{${\scriptstyle {}^{3}}$}}}{1} +{⁴}{{\ucc{${\scriptstyle {}^{4}}$}}}{1} {⁵}{{\ucc{${\scriptstyle {}^{5}}$}}}{1} +{⁶}{{\ucc{${\scriptstyle {}^{6}}$}}}{1} {⁷}{{\ucc{${\scriptstyle {}^{7}}$}}}{1} +{⁸}{{\ucc{${\scriptstyle {}^{8}}$}}}{1} {⁹}{{\ucc{${\scriptstyle {}^{9}}$}}}{1} +{⁼}{{\ucc{${\scriptstyle {}^{=}}$}}}{1} {ᴬ}{{\ucc{${\scriptstyle {}^{A}}$}}}{1} +{ᴮ}{{\ucc{${\scriptstyle {}^{B}}$}}}{1} {ᴰ}{{\ucc{${\scriptstyle {}^{D}}$}}}{1} +{ᴱ}{{\ucc{${\scriptstyle {}^{E}}$}}}{1} {ᴳ}{{\ucc{${\scriptstyle {}^{G}}$}}}{1} +{ᴴ}{{\ucc{${\scriptstyle {}^{H}}$}}}{1} {ᴵ}{{\ucc{${\scriptstyle {}^{I}}$}}}{1} +{ᴶ}{{\ucc{${\scriptstyle {}^{J}}$}}}{1} {ᴷ}{{\ucc{${\scriptstyle {}^{K}}$}}}{1} +{ᴸ}{{\ucc{${\scriptstyle {}^{L}}$}}}{1} {ᴹ}{{\ucc{${\scriptstyle {}^{M}}$}}}{1} +{ᴺ}{{\ucc{${\scriptstyle {}^{N}}$}}}{1} {ᴼ}{{\ucc{${\scriptstyle {}^{O}}$}}}{1} +{ᴾ}{{\ucc{${\scriptstyle {}^{P}}$}}}{1} {ᴿ}{{\ucc{${\scriptstyle {}^{R}}$}}}{1} +{ᵀ}{{\ucc{${\scriptstyle {}^{T}}$}}}{1} {ᵁ}{{\ucc{${\scriptstyle {}^{U}}$}}}{1} +{ⱽ}{{\ucc{${\scriptstyle {}^{V}}$}}}{1} {ᵂ}{{\ucc{${\scriptstyle {}^{W}}$}}}{1} +{ᵃ}{{\ucc{${\scriptstyle {}^{a}}$}}}{1} {ᵇ}{{\ucc{${\scriptstyle {}^{b}}$}}}{1} +{ᶜ}{{\ucc{${\scriptstyle {}^{c}}$}}}{1} {ᵈ}{{\ucc{${\scriptstyle {}^{d}}$}}}{1} +{ᵉ}{{\ucc{${\scriptstyle {}^{e}}$}}}{1} {ᶠ}{{\ucc{${\scriptstyle {}^{f}}$}}}{1} +{ᵍ}{{\ucc{${\scriptstyle {}^{g}}$}}}{1} {ʰ}{{\ucc{${\scriptstyle {}^{h}}$}}}{1} +{ⁱ}{{\ucc{${\scriptstyle {}^{i}}$}}}{1} {ʲ}{{\ucc{${\scriptstyle {}^{j}}$}}}{1} +{ᵏ}{{\ucc{${\scriptstyle {}^{k}}$}}}{1} {ˡ}{{\ucc{${\scriptstyle {}^{l}}$}}}{1} +{ᵐ}{{\ucc{${\scriptstyle {}^{m}}$}}}{1} {ⁿ}{{\ucc{${\scriptstyle {}^{n}}$}}}{1} +{ᵒ}{{\ucc{${\scriptstyle {}^{o}}$}}}{1} {ᵖ}{{\ucc{${\scriptstyle {}^{p}}$}}}{1} +{ʳ}{{\ucc{${\scriptstyle {}^{r}}$}}}{1} {ˢ}{{\ucc{${\scriptstyle {}^{s}}$}}}{1} +{ᵗ}{{\ucc{${\scriptstyle {}^{t}}$}}}{1} {ᵘ}{{\ucc{${\scriptstyle {}^{u}}$}}}{1} +{ᵛ}{{\ucc{${\scriptstyle {}^{v}}$}}}{1} {ʷ}{{\ucc{${\scriptstyle {}^{w}}$}}}{1} +{ˣ}{{\ucc{${\scriptstyle {}^{x}}$}}}{1} {ʸ}{{\ucc{${\scriptstyle {}^{y}}$}}}{1} +{ᶻ}{{\ucc{${\scriptstyle {}^{z}}$}}}{1} +{ᵅ}{{\ucc{${\scriptstyle {}^{\alpha}}$}}}{1} +{ᵝ}{{\ucc{${\scriptstyle {}^{\beta}}$}}}{1} +{ᵞ}{{\ucc{${\scriptstyle {}^{\gamma}}$}}}{1} +{ᵟ}{{\ucc{${\scriptstyle {}^{\delta}}$}}}{1} +{ᵋ}{{\ucc{${\scriptstyle {}^{\varepsilon}}$}}}{1} +{ᶿ}{{\ucc{${\scriptstyle {}^{\theta}}$}}}{1} +{ᶥ}{{\ucc{${\scriptstyle {}^{\iota}}$}}}{1} +{ᶲ}{{\ucc{${\scriptstyle {}^{\phi}}$}}}{1} +{ᵡ}{{\ucc{${\scriptstyle {}^{\chi}}$}}}{1} +{ᵠ}{{\ucc{${\scriptstyle {}^{\psi}}$}}}{1} +% +% subscripts +% +{₍}{{\ucc{${\scriptstyle {}_{(}}$}}}{1} {₎}{{\ucc{${\scriptstyle {}_{)}}$}}}{1} +{₊}{{\ucc{${\scriptstyle {}_{+}}$}}}{1} {₋}{{\ucc{${\scriptstyle {}_{-}}$}}}{1} +{₀}{{\ucc{${\scriptstyle {}_{0}}$}}}{1} {₁}{{\ucc{${\scriptstyle {}_{1}}$}}}{1} +{₂}{{\ucc{${\scriptstyle {}_{2}}$}}}{1} {₃}{{\ucc{${\scriptstyle {}_{3}}$}}}{1} +{₄}{{\ucc{${\scriptstyle {}_{4}}$}}}{1} {₅}{{\ucc{${\scriptstyle {}_{5}}$}}}{1} +{₆}{{\ucc{${\scriptstyle {}_{6}}$}}}{1} {₇}{{\ucc{${\scriptstyle {}_{7}}$}}}{1} +{₈}{{\ucc{${\scriptstyle {}_{8}}$}}}{1} {₉}{{\ucc{${\scriptstyle {}_{9}}$}}}{1} +{₌}{{\ucc{${\scriptstyle {}_{=}}$}}}{1} {ₐ}{{\ucc{${\scriptstyle {}_{a}}$}}}{1} +{ₑ}{{\ucc{${\scriptstyle {}_{e}}$}}}{1} {ₕ}{{\ucc{${\scriptstyle {}_{h}}$}}}{1} +{ᵢ}{{\ucc{${\scriptstyle {}_{i}}$}}}{1} {ⱼ}{{\ucc{${\scriptstyle {}_{j}}$}}}{1} +{ₖ}{{\ucc{${\scriptstyle {}_{k}}$}}}{1} {ₗ}{{\ucc{${\scriptstyle {}_{l}}$}}}{1} +{ₘ}{{\ucc{${\scriptstyle {}_{m}}$}}}{1} {ₙ}{{\ucc{${\scriptstyle {}_{n}}$}}}{1} +{ₒ}{{\ucc{${\scriptstyle {}_{o}}$}}}{1} {ₚ}{{\ucc{${\scriptstyle {}_{p}}$}}}{1} +{ᵣ}{{\ucc{${\scriptstyle {}_{r}}$}}}{1} {ₛ}{{\ucc{${\scriptstyle {}_{s}}$}}}{1} +{ₜ}{{\ucc{${\scriptstyle {}_{t}}$}}}{1} {ᵤ}{{\ucc{${\scriptstyle {}_{u}}$}}}{1} +{ᵥ}{{\ucc{${\scriptstyle {}_{v}}$}}}{1} {ₓ}{{\ucc{${\scriptstyle {}_{x}}$}}}{1} +{ᵦ}{{\ucc{${\scriptstyle {}_{\beta}}$}}}{1} +{ᵧ}{{\ucc{${\scriptstyle {}_{\gamma}}$}}}{1} +{ᵨ}{{\ucc{${\scriptstyle {}_{\rho}}$}}}{1} +{ᵪ}{{\ucc{${\scriptstyle {}_{\chi}}$}}}{1} +{ᵩ}{{\ucc{${\scriptstyle {}_{\psi}}$}}}{1} +% +} + + + + + +% basic font +\makeatletter +\def\lstbasicfont{% + \color{jlstring}% + \ttfamily% + \lst@ifdisplaystyle\scriptsize\fi% +} +\makeatother + +% general style of the code block +\lstset{basicstyle={\lstbasicfont}} +\lstset{showstringspaces=false} +\lstset{upquote=true} +\lstset{tabsize=4} +\lstset{aboveskip={1.5\baselineskip},belowskip={1.5\baselineskip}} + +% creating the code box +\lstset{backgroundcolor=\color{jlbackground}, rulecolor=\color{jlrule}} +\lstset{frame=single, frameround=tttt} +\lstset{columns=fixed} +\newlength{\bfem} +\settowidth{\bfem}{\lstbasicfont{m}} +\newlength{\xmrgn} +\setlength{\xmrgn}{(\textwidth - 80\bfem)*\real{0.5}} +\lstset{basewidth=\bfem} + +% activating the julia style +\lstset{language=julia} diff --git a/paper/journal_dat.tex b/paper/journal_dat.tex new file mode 100644 index 0000000..68bbf06 --- /dev/null +++ b/paper/journal_dat.tex @@ -0,0 +1,6 @@ +% **************GENERATED FILE, DO NOT EDIT************** + +\def\@journalName{Proceedings of JuliaCon} +\def\@volume{1} +\def\@issue{1} +\def\@year{2020} diff --git a/paper/julia_c_gpu_par_eff_lin.png b/paper/julia_c_gpu_par_eff_lin.png new file mode 100644 index 0000000..f679005 Binary files /dev/null and b/paper/julia_c_gpu_par_eff_lin.png differ diff --git a/paper/julia_gpu_par_eff.png b/paper/julia_gpu_par_eff.png new file mode 100644 index 0000000..6067b17 Binary files /dev/null and b/paper/julia_gpu_par_eff.png differ diff --git a/paper/juliacon.bst b/paper/juliacon.bst new file mode 100644 index 0000000..49de64f --- /dev/null +++ b/paper/juliacon.bst @@ -0,0 +1,1097 @@ +% BibTeX standard bibliography style `plain' + % version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. + % Copyright (C) 1985, all rights reserved. + % Copying of this file is authorized only if either + % (1) you make absolutely no changes to your copy, including name, or + % (2) if you do make changes, you name it something other than + % btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. + % This restriction helps ensure that all standard styles are identical. + % The file btxbst.doc has the documentation for this style. + +ENTRY + { address + author + booktitle + chapter + edition + editor + howpublished + institution + journal + key + month + note + number + organization + pages + publisher + school + series + title + type + volume + year + } + {} + { label } + +INTEGERS { output.state before.all mid.sentence after.sentence after.block } + +FUNCTION {init.state.consts} +{ #0 'before.all := + #1 'mid.sentence := + #2 'after.sentence := + #3 'after.block := +} + +STRINGS { s t } + +FUNCTION {output.nonnull} +{ 's := + output.state mid.sentence = + { ", " * write$ } + { output.state after.block = + { add.period$ write$ + newline$ + "" write$ + } + { output.state before.all = + 'write$ + { add.period$ " " * write$ } + if$ + } + if$ + mid.sentence 'output.state := + } + if$ + s +} + +FUNCTION {output} +{ duplicate$ empty$ + 'pop$ + 'output.nonnull + if$ +} + +FUNCTION {output.check} +{ 't := + duplicate$ empty$ + { pop$ "empty " t * " in " * cite$ * warning$ } + 'output.nonnull + if$ +} + +FUNCTION {output.bibitem} +{ newline$ + "\bibitem{" write$ + cite$ write$ + "}" write$ + newline$ + "" + before.all 'output.state := +} + +FUNCTION {fin.entry} +{ add.period$ + write$ + newline$ +} + +FUNCTION {new.block} +{ output.state before.all = + 'skip$ + { after.block 'output.state := } + if$ +} + +FUNCTION {new.sentence} +{ output.state after.block = + 'skip$ + { output.state before.all = + 'skip$ + { after.sentence 'output.state := } + if$ + } + if$ +} + +FUNCTION {not} +{ { #0 } + { #1 } + if$ +} + +FUNCTION {and} +{ 'skip$ + { pop$ #0 } + if$ +} + +FUNCTION {or} +{ { pop$ #1 } + 'skip$ + if$ +} + +FUNCTION {new.block.checka} +{ empty$ + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.block.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.sentence.checka} +{ empty$ + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {new.sentence.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {field.or.null} +{ duplicate$ empty$ + { pop$ "" } + 'skip$ + if$ +} + +FUNCTION {emphasize} +{ duplicate$ empty$ + { pop$ "" } + { "{\em " swap$ * "}" * } + if$ +} + +INTEGERS { nameptr namesleft numnames } + +FUNCTION {format.names} +{ 's := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't := + nameptr #1 > + { namesleft #1 > + { ", " * t * } + { numnames #2 > + { "," * } + 'skip$ + if$ + t "others" = + { " et~al." * } + { " and " * t * } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {format.authors} +{ author empty$ + { "" } + { author format.names } + if$ +} + +FUNCTION {format.editors} +{ editor empty$ + { "" } + { editor format.names + editor num.names$ #1 > + { ", editors" * } + { ", editor" * } + if$ + } + if$ +} + +FUNCTION {format.title} +{ title empty$ + { "" } + { title "t" change.case$ } + if$ +} + +FUNCTION {n.dashify} +{ 't := + "" + { t empty$ not } + { t #1 #1 substring$ "-" = + { t #1 #2 substring$ "--" = not + { "--" * + t #2 global.max$ substring$ 't := + } + { { t #1 #1 substring$ "-" = } + { "-" * + t #2 global.max$ substring$ 't := + } + while$ + } + if$ + } + { t #1 #1 substring$ * + t #2 global.max$ substring$ 't := + } + if$ + } + while$ +} + +FUNCTION {format.date} +{ year empty$ + { month empty$ + { "" } + { "there's a month but no year in " cite$ * warning$ + month + } + if$ + } + { month empty$ + 'year + { month " " * year * } + if$ + } + if$ +} + +FUNCTION {format.btitle} +{ title emphasize +} + +FUNCTION {tie.or.space.connect} +{ duplicate$ text.length$ #3 < + { "~" } + { " " } + if$ + swap$ * * +} + +FUNCTION {either.or.check} +{ empty$ + 'pop$ + { "can't use both " swap$ * " fields in " * cite$ * warning$ } + if$ +} + +FUNCTION {format.bvolume} +{ volume empty$ + { "" } + { "volume" volume tie.or.space.connect + series empty$ + 'skip$ + { " of " * series emphasize * } + if$ + "volume and number" number either.or.check + } + if$ +} + +FUNCTION {format.number.series} +{ volume empty$ + { number empty$ + { series field.or.null } + { output.state mid.sentence = + { "number" } + { "Number" } + if$ + number tie.or.space.connect + series empty$ + { "there's a number but no series in " cite$ * warning$ } + { " in " * series * } + if$ + } + if$ + } + { "" } + if$ +} + +FUNCTION {format.edition} +{ edition empty$ + { "" } + { output.state mid.sentence = + { edition "l" change.case$ " edition" * } + { edition "t" change.case$ " edition" * } + if$ + } + if$ +} + +INTEGERS { multiresult } + +FUNCTION {multi.page.check} +{ 't := + #0 'multiresult := + { multiresult not + t empty$ not + and + } + { t #1 #1 substring$ + duplicate$ "-" = + swap$ duplicate$ "," = + swap$ "+" = + or or + { #1 'multiresult := } + { t #2 global.max$ substring$ 't := } + if$ + } + while$ + multiresult +} + +FUNCTION {format.pages} +{ pages empty$ + { "" } + { pages multi.page.check + { "pages" pages n.dashify tie.or.space.connect } + { "page" pages tie.or.space.connect } + if$ + } + if$ +} + +FUNCTION {format.vol.num.pages} +{ volume field.or.null + number empty$ + 'skip$ + { "(" number * ")" * * + volume empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + } + if$ + pages empty$ + 'skip$ + { duplicate$ empty$ + { pop$ format.pages } + { ":" * pages n.dashify * } + if$ + } + if$ +} + +FUNCTION {format.chapter.pages} +{ chapter empty$ + 'format.pages + { type empty$ + { "chapter" } + { type "l" change.case$ } + if$ + chapter tie.or.space.connect + pages empty$ + 'skip$ + { ", " * format.pages * } + if$ + } + if$ +} + +FUNCTION {format.in.ed.booktitle} +{ booktitle empty$ + { "" } + { editor empty$ + { "In " booktitle emphasize * } + { "In " format.editors * ", " * booktitle emphasize * } + if$ + } + if$ +} + +FUNCTION {empty.misc.check} +{ author empty$ title empty$ howpublished empty$ + month empty$ year empty$ note empty$ + and and and and and + key empty$ not and + { "all relevant fields are empty in " cite$ * warning$ } + 'skip$ + if$ +} + +FUNCTION {format.thesis.type} +{ type empty$ + 'skip$ + { pop$ + type "t" change.case$ + } + if$ +} + +FUNCTION {format.tr.number} +{ type empty$ + { "Technical Report" } + 'type + if$ + number empty$ + { "t" change.case$ } + { number tie.or.space.connect } + if$ +} + +FUNCTION {format.article.crossref} +{ key empty$ + { journal empty$ + { "need key or journal for " cite$ * " to crossref " * crossref * + warning$ + "" + } + { "In {\em " journal * "\/}" * } + if$ + } + { "In " key * } + if$ + " \cite{" * crossref * "}" * +} + +FUNCTION {format.crossref.editor} +{ editor #1 "{vv~}{ll}" format.name$ + editor num.names$ duplicate$ + #2 > + { pop$ " et~al." * } + { #2 < + 'skip$ + { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = + { " et~al." * } + { " and " * editor #2 "{vv~}{ll}" format.name$ * } + if$ + } + if$ + } + if$ +} + +FUNCTION {format.book.crossref} +{ volume empty$ + { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ + "In " + } + { "Volume" volume tie.or.space.connect + " of " * + } + if$ + editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { series empty$ + { "need editor, key, or series for " cite$ * " to crossref " * + crossref * warning$ + "" * + } + { "{\em " * series * "\/}" * } + if$ + } + { key * } + if$ + } + { format.crossref.editor * } + if$ + " \cite{" * crossref * "}" * +} + +FUNCTION {format.incoll.inproc.crossref} +{ editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { booktitle empty$ + { "need editor, key, or booktitle for " cite$ * " to crossref " * + crossref * warning$ + "" + } + { "In {\em " booktitle * "\/}" * } + if$ + } + { "In " key * } + if$ + } + { "In " format.crossref.editor * } + if$ + " \cite{" * crossref * "}" * +} + +FUNCTION {article} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + crossref missing$ + { journal emphasize "journal" output.check + format.vol.num.pages output + format.date "year" output.check + } + { format.article.crossref output.nonnull + format.pages output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {book} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {booklet} +{ output.bibitem + format.authors output + new.block + format.title "title" output.check + howpublished address new.block.checkb + howpublished output + address output + format.date output + new.block + note output + fin.entry +} + +FUNCTION {inbook} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + format.chapter.pages "chapter and pages" output.check + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { format.chapter.pages "chapter and pages" output.check + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {incollection} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.chapter.pages output + new.sentence + publisher "publisher" output.check + address output + format.edition output + format.date "year" output.check + } + { format.incoll.inproc.crossref output.nonnull + format.chapter.pages output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {inproceedings} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.pages output + address empty$ + { organization publisher new.sentence.checkb + organization output + publisher output + format.date "year" output.check + } + { address output.nonnull + format.date "year" output.check + new.sentence + organization output + publisher output + } + if$ + } + { format.incoll.inproc.crossref output.nonnull + format.pages output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {conference} { inproceedings } + +FUNCTION {manual} +{ output.bibitem + author empty$ + { organization empty$ + 'skip$ + { organization output.nonnull + address output + } + if$ + } + { format.authors output.nonnull } + if$ + new.block + format.btitle "title" output.check + author empty$ + { organization empty$ + { address new.block.checka + address output + } + 'skip$ + if$ + } + { organization address new.block.checkb + organization output + address output + } + if$ + format.edition output + format.date output + new.block + note output + fin.entry +} + +FUNCTION {mastersthesis} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + "Master's thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {misc} +{ output.bibitem + format.authors output + title howpublished new.block.checkb + format.title output + howpublished new.block.checka + howpublished output + format.date output + new.block + note output + fin.entry + empty.misc.check +} + +FUNCTION {phdthesis} +{ output.bibitem + format.authors "author" output.check + new.block + format.btitle "title" output.check + new.block + "PhD thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {proceedings} +{ output.bibitem + editor empty$ + { organization output } + { format.editors output.nonnull } + if$ + new.block + format.btitle "title" output.check + format.bvolume output + format.number.series output + address empty$ + { editor empty$ + { publisher new.sentence.checka } + { organization publisher new.sentence.checkb + organization output + } + if$ + publisher output + format.date "year" output.check + } + { address output.nonnull + format.date "year" output.check + new.sentence + editor empty$ + 'skip$ + { organization output } + if$ + publisher output + } + if$ + new.block + note output + fin.entry +} + +FUNCTION {techreport} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + format.tr.number output.nonnull + institution "institution" output.check + address output + format.date "year" output.check + new.block + note output + fin.entry +} + +FUNCTION {unpublished} +{ output.bibitem + format.authors "author" output.check + new.block + format.title "title" output.check + new.block + note "note" output.check + format.date output + fin.entry +} + +FUNCTION {default.type} { misc } + +MACRO {jan} {"January"} + +MACRO {feb} {"February"} + +MACRO {mar} {"March"} + +MACRO {apr} {"April"} + +MACRO {may} {"May"} + +MACRO {jun} {"June"} + +MACRO {jul} {"July"} + +MACRO {aug} {"August"} + +MACRO {sep} {"September"} + +MACRO {oct} {"October"} + +MACRO {nov} {"November"} + +MACRO {dec} {"December"} + +MACRO {acmcs} {"ACM Computing Surveys"} + +MACRO {acta} {"Acta Informatica"} + +MACRO {cacm} {"Communications of the ACM"} + +MACRO {ibmjrd} {"IBM Journal of Research and Development"} + +MACRO {ibmsj} {"IBM Systems Journal"} + +MACRO {ieeese} {"IEEE Transactions on Software Engineering"} + +MACRO {ieeetc} {"IEEE Transactions on Computers"} + +MACRO {ieeetcad} + {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} + +MACRO {ipl} {"Information Processing Letters"} + +MACRO {jacm} {"Journal of the ACM"} + +MACRO {jcss} {"Journal of Computer and System Sciences"} + +MACRO {scp} {"Science of Computer Programming"} + +MACRO {sicomp} {"SIAM Journal on Computing"} + +MACRO {tocs} {"ACM Transactions on Computer Systems"} + +MACRO {tods} {"ACM Transactions on Database Systems"} + +MACRO {tog} {"ACM Transactions on Graphics"} + +MACRO {toms} {"ACM Transactions on Mathematical Software"} + +MACRO {toois} {"ACM Transactions on Office Information Systems"} + +MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} + +MACRO {tcs} {"Theoretical Computer Science"} + +READ + +FUNCTION {sortify} +{ purify$ + "l" change.case$ +} + +INTEGERS { len } + +FUNCTION {chop.word} +{ 's := + 'len := + s #1 len substring$ = + { s len #1 + global.max$ substring$ } + 's + if$ +} + +FUNCTION {sort.format.names} +{ 's := + #1 'nameptr := + "" + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { nameptr #1 > + { " " * } + 'skip$ + if$ + s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't := + nameptr numnames = t "others" = and + { "et al" * } + { t sortify * } + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {sort.format.title} +{ 't := + "A " #2 + "An " #3 + "The " #4 t chop.word + chop.word + chop.word + sortify + #1 global.max$ substring$ +} + +FUNCTION {author.sort} +{ author empty$ + { key empty$ + { "to sort, need author or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.editor.sort} +{ author empty$ + { editor empty$ + { key empty$ + { "to sort, need author, editor, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.organization.sort} +{ author empty$ + { organization empty$ + { key empty$ + { "to sort, need author, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {editor.organization.sort} +{ editor empty$ + { organization empty$ + { key empty$ + { "to sort, need editor, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { editor sort.format.names } + if$ +} + +FUNCTION {presort} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.sort + { type$ "proceedings" = + 'editor.organization.sort + { type$ "manual" = + 'author.organization.sort + 'author.sort + if$ + } + if$ + } + if$ + " " + * + year field.or.null sortify + * + " " + * + title field.or.null + sort.format.title + * + #1 entry.max$ substring$ + 'sort.key$ := +} + +ITERATE {presort} + +SORT + +STRINGS { longest.label } + +INTEGERS { number.label longest.label.width } + +FUNCTION {initialize.longest.label} +{ "" 'longest.label := + #1 'number.label := + #0 'longest.label.width := +} + +FUNCTION {longest.label.pass} +{ number.label int.to.str$ 'label := + number.label #1 + 'number.label := + label width$ longest.label.width > + { label 'longest.label := + label width$ 'longest.label.width := + } + 'skip$ + if$ +} + +EXECUTE {initialize.longest.label} + +ITERATE {longest.label.pass} + +FUNCTION {begin.bib} +{ preamble$ empty$ + 'skip$ + { preamble$ write$ newline$ } + if$ + "\begin{thebibliography}{" longest.label * "}" * write$ newline$ +} + +EXECUTE {begin.bib} + +EXECUTE {init.state.consts} + +ITERATE {call.type$} + +FUNCTION {end.bib} +{ newline$ + "\end{thebibliography}" write$ newline$ +} + +EXECUTE {end.bib} diff --git a/paper/juliacon.cls b/paper/juliacon.cls new file mode 100644 index 0000000..e2c156f --- /dev/null +++ b/paper/juliacon.cls @@ -0,0 +1,944 @@ +%% juliacon.cls - version 1.0 + +%% Inspired by the template from the International Journal of Computer Applications (IJCA) + +\usepackage[scaled=0.92]{helvet} +\def\fileversion{v1.0} +\def\filedate{2019 04 07} +% +\NeedsTeXFormat{LaTeX2e} +\ProvidesClass{juliacon} +\RequirePackage{latexsym} +\RequirePackage{url} + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} + +\newif\ifmanuscript +\@twosidetrue\@mparswitchtrue +% +\newdimen\trimheight +\newdimen\trimwidth +\newdimen\typeheight +\newdimen\typewidth +\newdimen\normaltextheight +\newdimen\blindfoliodrop +\newbox\tempbox +%% + +\input{journal_dat} + +% +\frenchspacing % oh lala bravo quelle belle idée +\DeclareOption{manuscript}{\manuscripttrue} +\DeclareOption{letterpaper} + {\setlength\paperheight {11.69in}% + \setlength\paperwidth {8.27in}% + \def\special@paper{8.5in,11in} + \special{papersize=8.5in,11in}} + +\DeclareOption{openbib}{% + \AtEndOfPackage{% + \renewcommand\@openbib@code{% + \advance\leftmargin\bibindent + \itemindent -\bibindent + \listparindent \itemindent + \parsep \z@ + }% + \renewcommand\newblock{\par}}% +} +% +\ExecuteOptions{letterpaper} +\ProcessOptions +% +\newcommand\refname{References} +\newcommand{\ignoretwo}[2]{} +\newcommand{\yearTwoDigits}{\expandafter\ignoretwo\the\year} +\def\@setref#1#2#3{% + \ifx#1\relax + \number 0\relax + \protect\G@refundefinedtrue + \nfss@text{\reset@font\bfseries ??}% + \@latex@warning{Reference `#3' on page \thepage \space undefined}% + \else + \expandafter#2#1\null + \fi} +% +% +\lineskip 1pt \normallineskip 1pt +\ifmanuscript +\def\baselinestretch{2} +\else +\def\baselinestretch{1} +\fi +\def\@ixpt{9} +\renewcommand\normalsize{% + \@setfontsize\normalsize\@ixpt{10pt} + \abovedisplayskip 6pt plus2pt minus1pt\belowdisplayskip \abovedisplayskip + \abovedisplayshortskip 6pt plus0pt minus 3pt + \belowdisplayshortskip 6pt plus0pt minus3pt\let\@listi\@listI} + +\newcommand\small{% + \@setfontsize\small\@ixpt{11pt}% + \abovedisplayskip 5pt plus 2pt minus 1pt\belowdisplayskip \abovedisplayskip + \abovedisplayshortskip 5pt plus0pt minus2pt\belowdisplayshortskip 5pt plus0pt + minus 2pt + \def\@listi{\leftmargin\leftmargini \topsep 5pt plus 2pt minus 1pt\parsep 0pt + plus .7pt + \itemsep 1.6pt plus .8pt}} +\newcommand\footnotesize{% +% \@setfontsize\footnotesize\@viiipt{10pt} + \@setsize\footnotesize{10pt}\viiipt\@viiipt + \abovedisplayskip 4pt plus 1pt minus 0pt\belowdisplayskip \abovedisplayskip + \abovedisplayshortskip 4pt plus 0pt minus 1pt\belowdisplayshortskip 4pt plus + 0pt minus 1pt + \def\@listi{\leftmargin\leftmargini \topsep 4pt plus 1pt minus + 0pt\parsep 0pt plus .5pt + \itemsep 1pt plus .7pt}} + +\newcommand\scriptsize{\@setfontsize\scriptsize\@viipt\@viiipt} +\newcommand\tiny{\@setfontsize\tiny\@vpt\@vipt} +\newcommand\large{\@setfontsize\large\@xiipt{14}} +\newcommand\Large{\@setfontsize\Large\@xivpt{18}} +\newcommand\LARGE{\@setfontsize\LARGE\@xviipt{20}} +\newcommand\huge{\@setfontsize\huge\@xxpt{25}} +\newcommand\Huge{\@setfontsize\Huge\@xxvpt{30}} +% +\normalsize +% +\newdimen\tempdimen +% +\setlength\trimheight{11in} +\setlength\trimwidth{8.5in} +% +\typeheight52.5pc +\typewidth42pc +\textheight52.5pc +\textwidth42pc +\advance\textheight-3pt +\newdimen\normaltextheight +\setlength\normaltextheight{\textheight} +\oddsidemargin4.5pc +\evensidemargin4.5pc +\topmargin20pt %.25in +\headheight 6pt% +\headsep 29.2pt% +\topskip6pt% +\footskip 100pt +% +\marginparwidth 0.5in +\marginparsep .125in +\columnsep24pt +\columnseprule 0pt +% +\def\titlefont{\huge\selectfont\centering\mathversion{bold}} +\def\authorfont{\fontfamily{phv}\fontsize{10}{12}\selectfont\rightskip0pt plus1fill} %\mathversion{sfnormal} +\def\rhfont{\fontfamily{phv}\fontsize{9}{10}\selectfont\mathversion{sfnormal}} + +\def\sectionfont{\fontfamily{ptm}\fontsize{9}{12}\capsshape\selectfont\raggedright} %\mathversion{rmnormal} +\def\subsectionfont{\fontfamily{ptm}\fontsize{9}{12}\selectfont} %\mathversion{rmnormal} +\def\figcaptionfont{\fontsize{8}{10}\selectfont\mathversion{normal}}% +\def\subcaptionfont{\fontsize{8}{10}\selectfont\mathversion{normal}}% +\def\subcaption#1{{\centering\subcaptionfont#1\par}} +% +\def\tablefont{\fontsize{8}{10}\selectfont}% +\def\tablecaptionfont{\fontsize{9}{11}\selectfont\centering}% +\def\tablenumfont{\fontsize{9}{11}\selectfont}% +\def\tabnotefont{\fontsize{7}{9}\selectfont} +% +\def\encodingdefault{OT1}% +\fontencoding{OT1}% +% +\DeclareFontShape{OMS}{cmsy}{m}{n}{<-> cmsy10 }{} +\DeclareFontShape{OMS}{cmsy}{b}{n}{<-> cmbsy10 }{} +\def\cal{\mathcal} +% +\def\boldmath{\mathversion{bold}} +\def\bm#1{\mathchoice + {\mbox{\boldmath$\displaystyle#1$}}% + {\mbox{\boldmath$#1$}}% + {\mbox{\boldmath$\scriptstyle#1$}}% + {\mbox{\boldmath$\scriptscriptstyle#1$}}} +% +\footnotesep 7pt +\skip\footins 15pt plus 4pt minus 3pt +\floatsep 12pt plus 2pt minus 2pt +\textfloatsep \floatsep +\intextsep 1pc plus 1pc +\dblfloatsep 12pt plus 2pt minus 2pt +\dbltextfloatsep 20pt plus 2pt minus 4pt +\@fptop 0pt plus 1fil \@fpsep 1pc plus 2fil \@fpbot 0pt plus 1fil +\@dblfptop 0pt plus 1fil \@dblfpsep 8pt plus 2fil \@dblfpbot 0pt plus 1fil +\marginparpush 6pt +\parskip 0pt \parindent 0pt \partopsep 0pt % plus .1pt FBU +\@lowpenalty 51 \@medpenalty 151 \@highpenalty 301 +\@beginparpenalty -\@lowpenalty \@endparpenalty -\@lowpenalty \@itempenalty +-\@lowpenalty +% +\def\part{\@ucheadtrue + \@startsection{part}{9}{\z@}{-10pt plus -4pt minus + -2pt}{4pt}{\reset@font\normalsize\rmfamily}} +\def\section{\@ucheadtrue + \@startsection{section}{1}{\z@}{-10pt plus -4pt minus + -2pt}{6pt}{\reset@font\fontsize{10}{12}\raggedright\rmfamily\bfseries}} +\def\subsection{\@ucheadfalse + \@startsection{subsection}{2}{\z@}{-8pt plus -2pt minus + -1pt}{6pt}{\reset@font\fontsize{10}{12}\raggedright\rmfamily\bfseries}} +\def\subsubsection{\@ucheadfalse + \@startsection{subsubsection}{3}{\parindent}{6pt plus +1pt}{-5pt}{\reset@font\fontsize{9}{10}\itshape}} +\def\paragraph{\@ucheadfalse + \@startsection{paragraph}{3}{\parindent}{6pt plus +1pt}{-5pt}{\reset@font\fontsize{10}{12}\itshape}} +%% +\renewcommand{\@seccntformat}[1]{\textup{\csname the#1\endcsname}} +\gdef\@period{.} +\def\@trivlist{\@topsepadd\topsep +\if@noskipsec \gdef\@period{}\leavevmode\gdef\@period{.}\fi + \ifvmode \advance\@topsepadd\partopsep \else \unskip\par\fi + \if@inlabel \@noparitemtrue \@noparlisttrue + \else \@noparlistfalse \@topsep\@topsepadd \fi + \advance\@topsep \parskip + \leftskip\z@\rightskip\@rightskip \parfillskip\@flushglue + \@setpar{\if@newlist\else{\@@par}\fi} \global\@newlisttrue +\@outerparskip\parskip} +% +\def\@startsection#1#2#3#4#5#6{% + \if@noskipsec \leavevmode \fi + \par + \@tempskipa #4\relax + \@afterindenttrue + \ifdim \@tempskipa <\z@ + \@tempskipa -\@tempskipa \@afterindentfalse + \fi + \if@nobreak + \everypar{}% + \ifnum#2=2 + \vskip-2pt + \fi + \else + \addpenalty\@secpenalty\addvspace\@tempskipa + \fi + \@ifstar + {\@ssect{#3}{#4}{#5}{#6}}% + {\@dblarg{\@sect{#1}{#2}{#3}{#4}{#5}{#6}}}} +% +\def\@sect#1#2#3#4#5#6[#7]#8{% + \ifnum #2>\c@secnumdepth + \let\@svsec\@empty + \else + \refstepcounter{#1}% + \if@uchead% + \protected@edef\@svsec{\@seccntformat{#1}.\quad\relax}% + \else% + \protected@edef\@svsec{\@seccntformat{#1}\quad\relax}% + \fi% + \fi + \@tempskipa #5\relax + \ifdim \@tempskipa>\z@ + \begingroup + #6{% + \@hangfrom{\hskip #3\relax\@svsec}% + \interlinepenalty \@M #8 \@@par}% + \endgroup + \csname #1mark\endcsname{#7}% + \addcontentsline{toc}{#1}{% + \ifnum #2>\c@secnumdepth \else + \protect\numberline{\csname the#1\endcsname}% + \fi + #7}% + \else + \def\@svsechd{% + #6{\hskip #3\relax + \@svsec \if@uchead\Makeuppercase{#8}\else#8\fi}% + \csname #1mark\endcsname{#7}% + \addcontentsline{toc}{#1}{% + \ifnum #2>\c@secnumdepth \else + \protect\numberline{\csname the#1\endcsname}% + \fi + #7}}% + \fi + \@xsect{#5}} + +\def\@xsect#1{\@tempskipa #1\relax + \ifdim \@tempskipa>\z@ + \par \nobreak + \vskip \@tempskipa + \@afterheading + \else \global\@nobreakfalse \global\@noskipsectrue + \everypar{\if@noskipsec \global\@noskipsecfalse + \clubpenalty\@M \hskip -\parindent + \begingroup \@svsechd\@period \endgroup \unskip + \hskip -#1 + \else \clubpenalty \@clubpenalty + \everypar{}\fi}\fi\ignorespaces} +\newif\if@uchead\@ucheadfalse +% +\setcounter{secnumdepth}{3} +\newcounter{secnumbookdepth} +\setcounter{secnumbookdepth}{3} +\newfont{\apbf}{cmbx9} +\def\appendix{\par + \setcounter{section}{0} + \setcounter{subsection}{0} + \section*{APPENDIX}\vskip10pt + \def\thesection{\Alph{section}} + \def\theHsection{\Alph{section}}} +% +\labelsep 4pt +\settowidth{\leftmargini}{(9)} \addtolength\leftmargini\labelsep +\settowidth{\leftmarginii}{(b)} \addtolength\leftmarginii\labelsep +\leftmarginiii \leftmarginii +\leftmarginiv \leftmarginii +\leftmarginv \leftmarginii +\leftmarginvi \leftmarginii +\leftmargin\leftmargini +\labelwidth\leftmargini\advance\labelwidth-\labelsep +\def\@listI{\leftmargin\leftmargini \parsep 0pt plus 1pt\topsep 6pt plus 2pt +minus 2pt\itemsep 2pt plus 1pt minus .5pt} +\let\@listi\@listI +\@listi +\def\@listii{\leftmargin\leftmarginii + \labelwidth\leftmarginii\advance\labelwidth-\labelsep + \topsep 0pt plus 1pt + \parsep 0pt plus .5pt + \itemsep \parsep} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii\advance\labelwidth-\labelsep + \topsep 0pt plus 1pt + \parsep 0pt plus .5pt + \itemsep \parsep} +\def\@listiv{\leftmargin\leftmarginiv + \labelwidth\leftmarginiv\advance\labelwidth-\labelsep} +\def\@listv{\leftmargin\leftmarginv + \labelwidth\leftmarginv\advance\labelwidth-\labelsep} +\def\@listvi{\leftmargin\leftmarginvi + \labelwidth\leftmarginvi\advance\labelwidth-\labelsep} +% +\def\enumerate{\ifnum \@enumdepth >3 \@toodeep\else + \advance\@enumdepth \@ne + \edef\@enumctr{enum\romannumeral\the\@enumdepth}\list + {\csname label\@enumctr\endcsname}{\usecounter + {\@enumctr}\def\makelabel##1{##1\hss}}\fi} +\def\longenum{\ifnum \@enumdepth >3 \@toodeep\else + \advance\@enumdepth \@ne + \edef\@enumctr{enum\romannumeral\the\@enumdepth}\list + {\csname label\@enumctr\endcsname}{\usecounter + {\@enumctr}\labelwidth\z@}\fi} +\let\endlongenum\endlist +\def\labelenumi{{\rm (}\arabic{enumi}\/{\rm )}} +\def\theenumi{\arabic{enumi}} +\def\labelenumii{{\rm (}\alph{enumii}\rm{)}} +\def\theenumii{\alph{enumii}} +\def\p@enumii{\theenumi} +\def\labelenumiii{\roman{enumiii}.} +\def\theenumiii{\roman{enumiii}} +\def\p@enumiii{\theenumi{\rm (}\theenumii{\rm )}} +\def\labelenumiv{\Alph{enumiv}.} +\def\theenumiv{\Alph{enumiv}} +\renewcommand\theenumiv{\@Alph\c@enumiv} +\def\p@enumiv{\p@enumiii\theenumiii} + +\def\p@enumiv{\p@enumiii\theenumiii} + +\renewcommand\p@enumii{\theenumi} +\renewcommand\p@enumiii{\theenumi(\theenumii)} +\renewcommand\p@enumiv{\p@enumiii\theenumiii} + +\def\itemize{\list{---\hskip -\labelsep}{\settowidth + {\leftmargin}{---}\labelwidth\leftmargin + \addtolength{\labelwidth}{-\labelsep}}} +\let\enditemize\endlist +\def\longitem{\list{---}{\labelwidth\z@ + \leftmargin\z@ \itemindent\parindent \advance\itemindent\labelsep}} +\let\endlongitem\endlist +\def\verse{\let\\=\@centercr + \list{}{\leftmargin 2pc + \itemindent -1.5em\listparindent \itemindent + \rightmargin\leftmargin\advance\leftmargin 1.5em}\item[]} +\let\endverse\endlist +\def\quotation{\list{}{\leftmargin 2pc \listparindent .5em + \itemindent\listparindent + \rightmargin\leftmargin \parsep 0pt plus 1pt}\item[]} +\let\endquotation=\endlist +\def\quote{\list{}{\leftmargin 2pc \rightmargin\leftmargin}\item[]} +\let\endquote=\endlist + +% +\newenvironment{unnumlist}{% + \list{}{% + \listparindent\parindent + \itemindent-1em + \leftmargin1em + \parsep0pt + \itemsep2pt + \partopsep0pt} + \def\makelable##1{##1}% +}{\endlist}% +% +\def\description{\list{}{\listparindent\parindent\labelwidth\z@ + \leftmargin\z@ \itemindent\parindent\advance\itemindent\labelsep + \def\makelabel##1{\it ##1}}} +\let\enddescription\endlist +% +\def\describe#1{\list{}{\listparindent\parindent\settowidth{\labelwidth}{#1}\leftmargin + \labelwidth\addtolength\leftmargin\labelsep\def\makelabel##1{##1\hfil}}} +\let\enddescribe\endlist +% +\def\program{\ifx\@currsize\normalsize\small \else \rm \fi\tabbing} +\let\endprogram\endtabbing +% +\newtheorem{theorem}{Theorem} +\newtheorem{strategy}{Strategy} +\newtheorem{property}{Property} +\newtheorem{proposition}{Proposition} +\newtheorem{lemma}[theorem]{Lemma} +\newtheorem{exam}{Example} +\newenvironment{example}{% +\italicenvfalse +\begin{exam}}{\end{exam}\italicenvtrue} +% +\newtheorem{defi}[theorem]{Definition} +\newenvironment{definition}{% +\italicenvfalse +\begin{defi}}{\end{defi}\italicenvtrue} +% +\def\@begintheorem#1#2{\trivlist \item[\hskip 10pt\hskip + \labelsep{\sc{#1}\hskip 5pt\relax #2.}] \itshape} +% +\def\@opargbegintheorem#1#2#3{\trivlist + \item[\hskip 10pt \hskip +\labelsep{\sc{#1}\savebox\@tempboxa{\sc{#3}}\ifdim + \wd\@tempboxa>\z@ \hskip 5pt\relax \sc{#2} \box\@tempboxa\fi.}] +\itshape} +% +\newif\if@qeded\global\@qededfalse +\def\proof{\global\@qededfalse\@ifnextchar[{\@xproof}{\@proof}} +\def\endproof{\if@qeded\else\qed\fi\endtrivlist} +\def\qed{\unskip\kern 10pt{\unitlength1pt\linethickness{.4pt}\framebox(5,5){}} +\global\@qededtrue} +\def\@proof{\trivlist \item[\hskip 10pt\hskip + \labelsep{\sc Proof.}]\ignorespaces} +\def\@xproof[#1]{\trivlist \item[\hskip 10pt\hskip + \labelsep{\sc Proof #1.}]\ignorespaces} +% +\def\newdef#1#2{\expandafter\@ifdefinable\csname #1\endcsname +{\@definecounter{#1}\expandafter\xdef\csname +the#1\endcsname{\@thmcounter{#1}}\global + \@namedef{#1}{\@defthm{#1}{#2}}\global + \@namedef{end#1}{\@endtheorem}}} +\def\@defthm#1#2{\refstepcounter + {#1}\@ifnextchar[{\@ydefthm{#1}{#2}}{\@xdefthm{#1}{#2}}} +\def\@xdefthm#1#2{\@begindef{#2}{\csname the#1\endcsname}\ignorespaces} +\def\@ydefthm#1#2[#3]{\trivlist \item[\hskip 10pt\hskip + \labelsep{\it #2\savebox\@tempboxa{#3}\ifdim + \wd\@tempboxa>\z@ \ \box\@tempboxa\fi.}]\ignorespaces} +\def\@begindef#1#2{\trivlist \item[\hskip 10pt\hskip + \labelsep{\it #1\ \rm #2.}]} +% +\def\theequation{\arabic{equation}} +% +\def\titlepage{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn + \else \newpage \fi \thispagestyle{empty}\c@page\z@} +\def\endtitlepage{\if@restonecol\twocolumn \else \newpage \fi} +% +\arraycolsep 2.5pt \tabcolsep 6pt \arrayrulewidth .4pt \doublerulesep 2pt +\tabbingsep \labelsep +% +\skip\@mpfootins = \skip\footins +\fboxsep = 3pt \fboxrule = .4pt +% +\newcounter{part} +\newcounter{section} +\newcounter{subsection}[section] +\newcounter{subsubsection}[subsection] +\newcounter{paragraph}[subsubsection] +% +\def\thepart{\Roman{part}} +\def\thesection {\arabic{section}} +\def\thesubsection {\thesection.\arabic{subsection}} +\def\thesubsubsection {\itshape\thesubsection.\arabic{subsubsection}} +\def\theparagraph {\thesubsubsection.\arabic{paragraph}} + +\def\@pnumwidth{1.55em} +\def\@tocrmarg {2.55em} +\def\@dotsep{4.5} +\setcounter{tocdepth}{3} + +\def\tableofcontents{\section*{Contents\@mkboth{CONTENTS}{CONTENTS}} + \@starttoc{toc}} +\def\l@part#1#2{\addpenalty{\@secpenalty} + \addvspace{2.25em plus 1pt} \begingroup + \@tempdima 3em \parindent \z@ \rightskip \@pnumwidth \parfillskip +-\@pnumwidth + {\large \bf \leavevmode #1\hfil \hbox to\@pnumwidth{\hss #2}}\par + \nobreak \endgroup} +\def\l@section#1#2{\addpenalty{\@secpenalty} \addvspace{1.0em plus 1pt} +\@tempdima 1.5em \begingroup + \parindent \z@ \rightskip \@pnumwidth + \parfillskip -\@pnumwidth + \bf \leavevmode #1\hfil \hbox to\@pnumwidth{\hss #2}\par + \endgroup} +\def\l@subsection{\@dottedtocline{2}{1.5em}{2.3em}} +\def\l@subsubsection{\@dottedtocline{3}{3.8em}{3.2em}} +\def\listoffigures{\section*{List of Figures\@mkboth + {LIST OF FIGURES}{LIST OF FIGURES}}\@starttoc{lof}} +\def\l@figure{\@dottedtocline{1}{1.5em}{2.3em}} +\def\listoftables{\section*{List of Tables\@mkboth + {LIST OF TABLES}{LIST OF TABLES}}\@starttoc{lot}} +\let\l@table\l@figure +% +\newif\if@restonecol +\def\theindex{\@restonecoltrue\if@twocolumn\@restonecolfalse\fi +\columnseprule \z@ +\columnsep 35pt\twocolumn[\section*{Index}] + \@mkboth{INDEX}{INDEX}\thispagestyle{plain}\parindent\z@ + \parskip\z@ plus .3pt\relax\let\item\@idxitem} +\def\@idxitem{\par\hangindent 40pt} +\def\subitem{\par\hangindent 40pt \hspace*{20pt}} +\def\subsubitem{\par\hangindent 40pt \hspace*{30pt}} +\def\endtheindex{\if@restonecol\onecolumn\else\clearpage\fi} +\def\indexspace{\par \vskip 10pt plus 5pt minus 3pt\relax} +% +\def\footnoterule{\kern-3\p@ + \hrule \@height 0.2\p@ \@width 47\p@ + \kern 2.6\p@ +} + +\long\def\@makefntext#1{\parindent 1em\noindent + $^{\@thefnmark}$#1} +% +\setcounter{topnumber}{3} +\def\topfraction{.99} +\setcounter{bottomnumber}{1} +\def\bottomfraction{.5} +\setcounter{totalnumber}{3} +\def\textfraction{.01} +\def\floatpagefraction{.85} +\setcounter{dbltopnumber}{2} +\def\dbltopfraction{.95} +\def\dblfloatpagefraction{.96} +% +\long\def\@makecaption#1#2{\vskip 1pc \setbox\@tempboxa\hbox{#1.\hskip +1em\relax #2} + \ifdim \wd\@tempboxa >\hsize #1. #2\par \else \hbox +to\hsize{\hfil\box\@tempboxa\hfil} + \fi} + +\def\nocaption{\refstepcounter\@captype \par + \vskip 1pc \hbox to\hsize{\hfil \footnotesize Figure \thefigure + \hfil}} +% +\newcounter{figure} +\def\thefigure{\@arabic\c@figure} +\def\fps@figure{tbp} +\def\ftype@figure{1} +\def\ext@figure{lof} +\def\fnum@figure{Fig.\ \thefigure}% +\def\figure{\let\normalsize\footnotesize\normalsize\@float{figure}} +\let\endfigure\end@float +\@namedef{figure*}{\@dblfloat{figure}} +\@namedef{endfigure*}{\end@dblfloat} +% +\newcounter{table} +\def\thetable{\@arabic\c@table} +\def\fps@table{tbp} +\def\ftype@table{2} +\def\ext@table{lot} +\newlength\belowcaptionskip +\setlength\belowcaptionskip{1\p@} +% +\def\FigName{figure}% +\long\def\@caption#1[#2]#3{\par\begingroup + \@parboxrestore + \normalsize \bf \centering + \@makecaption{\csname fnum@#1\endcsname}{\ignorespaces #3}\par + \endgroup} +% +% +\newbox\tbbox +\long\def\@makecaption#1#2{% + \ifx\FigName\@captype + \vskip 7.3pt + \setbox\@tempboxa\hbox{\figcaptionfont{#1}.\hskip7.3pt\relax #2\par}% + \ifdim \wd\@tempboxa >\hsize + \figcaptionfont{#1}.\hskip7.3pt\relax #2\par + \else + \centerline{\box\@tempboxa}% + \fi + \else% + \setbox\tbbox=\vbox{\hsize\tempdimen{\tablenumfont #1}\ {\tablecaptionfont #2\par}}% + \setbox\@tempboxa\hbox{\hsize\tempdimen{\tablenumfont #1}\ {\tablecaptionfont #2\par}\vphantom{jgq}}% + \ifdim \wd\@tempboxa >\tempdimen + \centerline{\box\tbbox}% + \else + \centerline{\box\@tempboxa}% + \fi + \vskip\belowcaptionskip + \fi} +% +\def\fnum@table{Table~\thetable.\ } +\def\table{\let\normalsize\footnotesize \normalsize\@float{table}} +\let\endtable\end@float +\@namedef{table*}{\@dblfloat{table}} +\@namedef{endtable*}{\end@dblfloat} +\def\ijcatable#1{\@narrowfig #1\relax + \let\caption\@atcap \let\nocaption\@atnocap + \def\@tmpnf{}\@ifnextchar[{\@xntab}{\@ntab}} +\def\endijcatable{\hbox to \textwidth{\hfil +\vbox{\hsize \@narrowfig +\box\@nfcapbox +{\baselineskip 4pt \hbox{\vrule height .4pt width \hsize}} +\vskip -1pt +\box\@nfigbox\vskip -1pt +{\baselineskip 4pt \hbox{\vrule height .4pt width \hsize}}}\hfil} +\end@float} +\def\@xntab[#1]{\def\@tmpnf{[#1]}\@ntab} +\def\@ntab{\expandafter\table\@tmpnf + \setbox\@nfigbox\vbox\bgroup + \hsize \@narrowfig \@parboxrestore} +\def\@atmakecap #1#2{\setbox\@tempboxa\hbox{#1.\hskip 1em\relax #2} + \ifdim \wd\@tempboxa >\hsize \sloppy #1.\hskip 1em\relax #2 \par \else \hbox +to\hsize{\hfil\box\@tempboxa\hfil} + \fi} +\def\@atcap{\par\egroup\refstepcounter\@captype + \@dblarg{\@atcapx\@captype}} +\long\def\@atcapx#1[#2]#3{\setbox\@nfcapbox\vbox {\hsize \wd\@nfigbox + \@parboxrestore + \@atmakecap{\csname fnum@#1\endcsname}{\ignorespaces #3}\par}} +\def\@atnocap{\egroup \refstepcounter\@captype + \setbox\@nfcapbox\vbox {\hsize \wd\@nfigbox + \hbox to\hsize{\hfil \footnotesize Table \thetable\hfil}}} +% +\newdimen\tabledim +% +\long\def\tbl#1#2{% + \setbox\tempbox\hbox{\tablefont #2}% + \tabledim\hsize\advance\tabledim by -\wd\tempbox + \tempdimen\wd\tempbox + \global\divide\tabledim\tw@ + \caption{#1} + \centerline{\box\tempbox} + }% +% +\newenvironment{tabnote}{% +\par%\addvspace{-1pt} +\tabnotefont +\@ifnextchar[{\@tabnote}{\@tabnote[]}}{% +\par} +\def\@tabnote[#1]{\def\@Tempa{#1}\leftskip\tabledim\rightskip\leftskip\ifx\@Tempa\@empty\else{\it #1:}\ \fi\ignorespaces} +% +\def\tabnoteentry#1#2{\parindent0pt\par\@hangfrom{#1}{#2}} +\def\Note#1#2{\parindent0pt\par\hangindent3.7pt{\it #1}\ #2} +% + +\def\Hline{% + \noalign{\ifnum0=`}\fi\hrule \@height .5pt \futurelet + \@tempa\@xhline} +% +\def\narrowfig#1{\@narrowfig #1\relax + \let\caption\@nfcap \let\nocaption\@nfnocap + \def\@tmpnf{}\@ifnextchar[{\@xnfig}{\@nfig}} +\def\endnarrowfig{\hbox to \textwidth{\if@nfeven + \box\@nfcapbox\hfil\box\@nfigbox + \else \box\@nfigbox\hfil\box\@nfcapbox\fi}\end@float} +\def\@xnfig[#1]{\def\@tmpnf{[#1]}\@nfig} +\def\@nfig{\expandafter\figure\@tmpnf + \setbox\@nfigbox\vbox\bgroup + \hsize \@narrowfig \@parboxrestore} +\def\@nfmakecap #1#2{\setbox\@tempboxa\hbox{#1.\hskip 1em\relax #2} + \ifdim \wd\@tempboxa >\hsize \sloppy #1.\hskip 1em\relax #2 \par \else \hbox +to\hsize{\if@nfeven\else\hfil\fi\box\@tempboxa\if@nfeven\hfil\fi} + \fi} +\def\@nfcap{\par\egroup\refstepcounter\@captype + \@dblarg{\@nfcapx\@captype}} +\long\def\@nfcapx#1[#2]#3{\@seteven + \setbox\@nfcapbox\vbox to \ht\@nfigbox + {\hsize \textwidth \advance\hsize -2pc \advance\hsize -\wd\@nfigbox + \@parboxrestore + \vfil + \@nfmakecap{\csname fnum@#1\endcsname}{\ignorespaces #3}\par + \vfil}} +\def\@nfnocap{\egroup \refstepcounter\@captype \@seteven + \setbox\@nfcapbox\vbox to \ht\@nfigbox + {\hsize \textwidth \advance\hsize -2pc \advance\hsize -\wd\@nfigbox + \@parboxrestore + \vfil + \hbox to\hsize{\if@nfeven\else\hfil\fi + \footnotesize Figure \thefigure + \if@nfeven\hfil\fi} + \vfil}} +\def\@seteven{\@nfeventrue + \@ifundefined{r@@nf\thefigure}{}{% + \edef\@tmpnf{\csname r@@nf\thefigure\endcsname}% + \edef\@tmpnf{\expandafter\@getpagenum\@tmpnf}% + \ifodd\@tmpnf\relax\@nfevenfalse\fi}% +\label{@nf\thefigure}\edef\@tmpnfx{\if@nfeven e\else o\fi} +\edef\@tmpnf{\write\@unused {\noexpand\ifodd \noexpand\c@page + \noexpand\if \@tmpnfx e\noexpand\@nfmsg{\thefigure} \noexpand\fi + \noexpand\else + \noexpand\if \@tmpnfx o\noexpand\@nfmsg{\thefigure}\noexpand\fi + \noexpand\fi }}\@tmpnf} +\def\@nfmsg#1{Bad narrowfig: Figure #1 on page \thepage} + +\newdimen\@narrowfig +\newbox\@nfigbox +\newbox\@nfcapbox +\newif\if@nfeven + + +\def\maketitle{% + \thispagestyle{titlepage}% + \newpage + \global\@topnum\z@ + \twocolumn[\@maketitle]% + \let\maketitle\relax + \global\let\@sponsors\@empty +} +% +\def\@maketitle{\newpage \thispagestyle{titlepage}\par + \begingroup \lineskip = \z@\null + \vspace{-1.75em} + \begin{picture}(5,5) + \includegraphics[width=1in]{logojuliacon.pdf} + \end{picture} + \vspace{1.75em} + \vskip -7pt\relax %-18.5pt + \parindent\z@ \LARGE {\centering \hyphenpenalty\@M + {\titlefont \@title} \par + \global\firstfoot %aiellom + \global\runningfoot %aiellom +} +\label{@firstpg} +{ +\begin{center}% + \vskip 0.1em% + {\large + \lineskip .75em% + \begin{tabular}[t]{c}% + \@author + \end{tabular}\par}% + \vskip 1.5em% + \end{center}\par + \@thanks +} + \vskip 23pt\relax + \endgroup + } +\newbox\@abstract +\newbox\@terms +\newbox\@keywords + + +% +\newenvironment{abstract} +{\section*{ABSTRACT}\par\fontsize{10}{12}\indent\ignorespaces} +{ + { \ifvoid\@terms\else\box\@terms\fi + \@keywords \@juliaconformat\empty}\vskip6pt} +% +\def\terms#1{\setbox\@terms=\vbox{\hsize20pc% + \footnotesize% + \parindent 0pt \noindent + { \section*{General Terms}} \ignorespaces #1{\vspace{-0.75em}}}} +\def\keywords#1{\gdef\@keywords{\hsize20pc% + \parindent 0pt\noindent\ignorespaces% + {{\vspace{-0.75em}} \section*{Keywords}} \ignorespaces #1{\vspace{1em}}}} +%} + +\def\category#1#2#3{\@ifnextchar + [{\@category{#1}{#2}{#3}}{\@xcategory{#1}{#2}{#3}}} +\def\@category#1#2#3[#4]{\edef\@tempa{\ifx \@categories\@empty + \else ; \fi}{\def\protect{\noexpand\protect + \noexpand}\def\and{\noexpand\and}\xdef\@categories{\@categories\@tempa #1 +[{\bf #2}]: + #3\kern\z@---\hskip\z@{\it #4}}}} +\def\@xcategory#1#2#3{\edef\@tempa{\ifx \@categories\@empty \else ; +\fi}{\def\protect{\noexpand\protect\noexpand}\def\and{\noexpand + \and}\xdef\@categories{\@categories\@tempa #1 [{\bf #2}]: #3}}} +\def\@categories{} + +\newenvironment{ackslike}[1] + {\par \footnotesize + \@ucheadfalse + \@startsection{subsection}{2}{\z@}{-16pt plus -2pt minus -1pt}{2pt}{\sf}* + {\uppercase{#1}}\par\normalsize + } + {\par} +\newenvironment{acks}{\begin{ackslike}{ \normalsize\rm\bf Acknowledgments}}{\end{ackslike}} +% + +\newcommand\headingtable{% + \begin{tabular}[b]{l} {\@journalName}\end{tabular}} +\markright{\protect\headingtable} +\mark{{}{}} +\def\bull{{\fontsize{7}{7}\selectfont\raise1.6pt\hbox{$\bullet$}}} +\def\ps@myheadings{\let\@mkboth\@gobbletwo +\def\@oddhead{ \fontsize{9}{12} \rm {{\itshape\headingtable}\hfill \@volume(\@issue), \@year}} +\def\@oddfoot{\fontsize{9}{12}\@runningfoot} +\def\@evenhead{ \fontsize{9}{12} \rm {\itshape\headingtable}\hfill \@volume(\@issue), \@year} +\def\@evenfoot{\fontsize{9}{12}\@runningfoot} +\def\sectionmark##1{}\def\subsectionmark##1{}} +% +\def\@runningfoot{} +\def\runningfoot{\def\@runningfoot{ \fontsize{9}{12} \thepage}} +\def\@firstfoot{} +\def\firstfoot{\def\@firstfoot{\fontsize{9}{12} \thepage}} +\def\ps@titlepage{\let\@mkboth\@gobbletwo +\def\@oddhead{}\def\@oddfoot{\fontsize{9}{12}\@firstfoot}\def\@evenhead{}\def\@evenfoot{\fontsize{9}{12}\@firstfoot}} +% +\def\today{\ifcase\month\or + January\or February\or March\or April\or May\or June\or + July\or August\or September\or October\or November\or December\fi + \space\number\day, \number\year} +\def\@marrayclassiv{\@addtopreamble{$\displaystyle \@nextchar$}} +\def\@marrayclassz{\ifcase \@lastchclass \@acolampacol \or \@ampacol \or + \or \or \@addamp \or + \@acolampacol \or \@firstampfalse \@acol \fi +\edef\@preamble{\@preamble + \ifcase \@chnum + \hfil$\relax\displaystyle\@sharp$\hfil \or $\relax\displaystyle\@sharp$\hfil + \or \hfil$\relax\displaystyle\@sharp$\fi}} +\def\marray{\arraycolsep 2.5pt\let\@acol\@arrayacol \let\@classz\@marrayclassz + \let\@classiv\@marrayclassiv \let\\\@arraycr\def\@halignto{}\@tabarray} +\def\endmarray{\crcr\egroup\egroup} +% +\ps@myheadings \pagenumbering{arabic} \onecolumn +% +\setlength \labelsep {.5em} +\setlength \labelwidth{\leftmargini} +\addtolength\labelwidth{-\labelsep} +\@beginparpenalty -\@lowpenalty +\@endparpenalty -\@lowpenalty +\@itempenalty -\@lowpenalty +% +\def\newdef#1{\@ifnextchar[{\@xnewdef{#1}}{\@ynewdef{#1}}} +\def\@xnewdef#1[#2]#3{\newtheorem{italic@#1}[#2]{{\em #3}}\@newdef{#1}} +\def\@ynewdef#1#2{\@ifnextchar[{\@xynewdef{#1}{#2}}{\@yynewdef{#1}{#2}}} +\def\@xynewdef#1#2[#3]{\newtheorem{italic@#1}{{\em #2}}[#3]\@newdef{#1}} +\def\@yynewdef#1#2{\newtheorem{italic@#1}{{\em #2}}\@newdef{#1}} +\def\@newdef#1{\newenvironment{#1}{\@ifnextchar[{\@xstartdef{#1}}{\@ystartdef{#1}}}{\end{italic@#1}}} +\def\@xstartdef#1[#2]{\begin{italic@#1}[{\em #2}]\rm} +\def\@ystartdef#1{\begin{italic@#1}\rm} +% +%\def\@oddfoot{\hbox{}\hfill\@runningfoot \thepage} +%\def\@evenfoot{\@runningfoot\hfill\hbox{} \thepage } +%\def\firstfootsize{\@setsize\firstfootsize{9pt}\vipt\@vipt} +\def\ps@titlepage{\let\@mkboth\@gobbletwo +\def\@oddhead{\fontsize{9}{12} \rm {\hskip 19pt\itshape}}\def\@oddfoot{\hbox{}\hfill\fontsize{9}{12}\@firstfoot}% +\def\@evenhead{}\def\@evenfoot{\firstfootsize\@firstfoot\hfill\hbox{}}} +% +\def\@listI{\leftmargin\leftmargini + \labelwidth\leftmargini\advance\labelwidth-\labelsep + \parsep 0pt plus 1pt + \topsep 6pt plus 2pt minus 2pt + \itemsep 2pt plus 1pt minus .5pt} +\let\@listi\@listI +\@listi +% +\def\longenum{\ifnum \@enumdepth >3 \@toodeep\else + \advance\@enumdepth \@ne + \edef\@enumctr{enum\romannumeral\the\@enumdepth}\list + {\csname label\@enumctr\endcsname}{\usecounter + {\@enumctr}\labelwidth\z@\leftmargin\z@ + \itemindent\parindent \advance\itemindent\labelsep}\fi} +% +\def\ack{ \par \footnotesize +\@ucheadfalse +\@startsection{subsection}{2}{\z@}{-16pt plus -2pt minus + -1pt}{2pt}{\sf}*{ACKNOWLEDGMENT}\par\normalsize +} +\def\endack{\par} + +% provide both spellings of Acknowledgment(s) +\let\acknowledgments\acks +\let\endacknowledgments\endacks +\let\acknowledgment\ack +\let\endacknowledgment\endack +% +\newcommand{\bibemph}[1]{{\em#1}} +\newcommand{\bibemphic}[1]{{\em#1\/}} +\newcommand{\bibsc}[1]{{\sc#1}} + +\newcommand\bibyear[2]{% + \unskip{\hskip8pt}\ignorespaces#1\unskip + \if..#2{\hskip6pt}\else {\hskip8pt}#2 \fi +} +% +\let\l@table\l@figure +\newdimen\bibindent +\setlength\bibindent{1.5em} +\newenvironment{thebibliography}[1] + {\section{\refname}%% + \list{\@biblabel{\@arabic\c@enumiv}}% + {\settowidth\labelwidth{\@biblabel{#1}}% + \leftmargin\labelwidth + \advance\leftmargin\labelsep + \@openbib@code + \usecounter{enumiv}% + \let\p@enumiv\@empty + \renewcommand\theenumiv{\@arabic\c@enumiv}}% + \sloppy + \clubpenalty4000 + \@clubpenalty \clubpenalty + \widowpenalty4000% + \sfcode`\.\@m} + {\def\@noitemerr + {\@latex@warning{Empty `thebibliography' environment}}% + \endlist} +\newcommand\newblock{\hskip .11em\@plus.33em\@minus.07em} +\let\@openbib@code\@empty + +% +\DeclareOldFontCommand{\rm}{\normalfont\rmfamily}{\mathrm} +\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf} +\DeclareOldFontCommand{\tt}{\normalfont\ttfamily}{\mathtt} +\DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf} +\DeclareOldFontCommand{\it}{\normalfont\itshape}{\mathit} +\DeclareOldFontCommand{\sl}{\normalfont\slshape}{\@nomath\sl} +\DeclareOldFontCommand{\sc}{\normalfont\scshape}{\@nomath\sc} +\DeclareRobustCommand*\cal{\@fontswitch\relax\mathcal} +\DeclareRobustCommand*\mit{\@fontswitch\relax\mathnormal} +% +\def\@juliaconformat{} +\def\juliaconformat#1{\gdef\@juliaconformat{\noindent{\bf JuliaCon Reference Format:}\\[2pt] #1\par}} +% +\def\received#1#2{% + \par% + \tiny + \addvspace{12\p@}% + \parindent\z@% +\small\scriptsize{Received\ #1;\ accepted\ #2}% +\par% +} + +% +\sloppy +\clubpenalty10000 +\widowpenalty10000% +\@lowpenalty 51 +\@medpenalty 151 +\@highpenalty 301 +% +\@beginparpenalty -\@lowpenalty +\@endparpenalty -\@lowpenalty +\@itempenalty -\@lowpenalty + +\voffset-5pc +\hoffset-6.03pc + +\usepackage{times} +%% \usepackage[mtbold]{mathtime} +\usepackage{bm} +\usepackage{graphicx}% Include figure files +\usepackage{hyperref} +%%\usepackage{microtype} +\renewcommand{\ttdefault}{cmtt} + +\usepackage{jlcode} + +\usepackage{authblk} + +\endinput + +% end of juliacon.cls diff --git a/paper/juliagraphs.png b/paper/juliagraphs.png new file mode 100644 index 0000000..99ca30c Binary files /dev/null and b/paper/juliagraphs.png differ diff --git a/paper/logojuliacon.pdf b/paper/logojuliacon.pdf new file mode 100644 index 0000000..744eaee Binary files /dev/null and b/paper/logojuliacon.pdf differ diff --git a/paper/paper.bbl b/paper/paper.bbl new file mode 100644 index 0000000..ef96e1a --- /dev/null +++ b/paper/paper.bbl @@ -0,0 +1,32 @@ +\begin{thebibliography}{1} + +\bibitem{besard2018effective} +T.~Besard, C.~Foket, and B.~De~Sutter. +Effective extensible programming: unleashing {J}ulia on {GPUs}. +{\em IEEE Transactions on Parallel and Distributed Systems}, 30(4):827--841, + 2018. + +\bibitem{byrne2021mpi} +S.~Byrne, L.~C. Wilcox, and V.~Churavy. +{MPI}.jl: {J}ulia bindings for the {M}essage {P}assing {I}nterface. +In {\em Proceedings of the JuliaCon Conferences}, volume~1, page~68, 2021. +\url{https://github.com/JuliaParallel/MPI.jl}. + +\bibitem{parallelstencil2022} +S.~Omlin and L.~R{\"{a}}ss. +{H}igh-performance x{PU} {S}tencil {C}omputations in {J}ulia. +{\em Proc. JuliaCon Conf.}, ?:2, 2022. + +\bibitem{pasc21} +S.~Omlin, L.~R{\"{a}}ss, N.~Keepfer, G.~Kwasniewski, B.~Malvoisin, and Y.~Y. + Podladchikov. +{Solving Nonlinear Partial Differential Equations on GPU Supercomputers Using + Julia}. +PASC21 conference, 2021. + +\bibitem{amdgpu_jl} +J.~Samaroo, T.~Besard, V.~Churavy, D.~Lin, and other contributors. +{AMDGPU.jl}: {AMD} {GPU} ({ROC}m) programming in {J}ulia. +\url{https://github.com/JuliaGPU/AMDGPU.jl}, 2013. + +\end{thebibliography} diff --git a/paper/paper.pdf b/paper/paper.pdf new file mode 100644 index 0000000..6b46728 Binary files /dev/null and b/paper/paper.pdf differ diff --git a/paper/paper.tex b/paper/paper.tex new file mode 100644 index 0000000..abf909c --- /dev/null +++ b/paper/paper.tex @@ -0,0 +1,117 @@ + +% JuliaCon proceedings template +\documentclass{juliacon} +\setcounter{page}{1} + +\newcommand{\ts}{\textsuperscript} +\hypersetup{hidelinks} + +\begin{document} + +\input{header} + +\maketitle + +\begin{abstract} + +We present a straightforward approach for distributed parallelization of stencil-based xPU applications on a regular staggered grid, which is instantiated in the package \texttt{ImplicitGlobalGrid.jl}. The approach allows to leverage remote direct memory access and enables close to ideal weak scaling of real-world applications on thousands of GPUs. The communication costs can be easily hidden behind computation. + +\end{abstract} + +\section{Introduction} + In light of the high pace of hardware evolution since the dawn of the 21\ts{st} century, the HPC community has identified the 3 ``P''s - (scalable) Performance, (performance) Portability and Productivity - as fundamental requirements for today's and tomorrow's software development. The approach and package development presented in this paper responds to each of the 3 ``P''s. We present an approach for automatic and architecture-agnostic distributed parallelization of stencil-based xPU applications on a regular staggered grid (with xPU we refer simultaneously to GPU and CPU in this paper). + +\section{Approach} +The here presented approach renders the distributed parallelization of stencil-based xPU applications on a regular staggered grid almost trivial. We have instantiated the approach in the Julia package \texttt{ImplicitGlobalGrid.jl}. A highlight in the design of ImplicitGlobalGrid is the automatic implicit creation of the global computational grid based on the number of processes the application is run with (and based on the process topology, which can be explicitly chosen by the user or automatically defined). As a consequence, the user only needs to write a code to solve his problem on one xPU (local grid); then, as little as three functions can be enough to transform a single xPU application into a massively scaling multi-xPU application: a first function creates the implicit global staggered grid, a second function performs a halo update on it, and a third function finalizes the global grid. ImplicitGlobalGrid does not have any requirements on the packages used to obtain good per xPU performance (shared memory parallelization and optimisations can, e.g., be performed with ParallelStencil \cite{parallelstencil2022} or any other package that might suit this task). Fig.~\ref{fig:code} shows a stencil-based 3-D heat diffusion xPU solver, where distributed parallelization is achieved with the three ImplicitGlobalGrid functions mentioned (lines 23, 38 and 43) plus some additional functions to query the size of the global grid (lines 24-26; in this example ParallelStencil \cite{parallelstencil2022} is used to obtain high per xPU performance). + +\begin{figure}[t] +\begin{lstlisting}[language = Julia, numbers=left, numberstyle=\tiny\color{gray}] +using ImplicitGlobalGrid +using ParallelStencil +using ParallelStencil.FiniteDifferences3D +@init_parallel_stencil(CUDA, Float64, 3) + +@parallel function step!(T2,T,Ci,lam,dt,dx,dy,dz) + @inn(T2) = @inn(T) + dt*( + lam*@inn(Ci)*(@d2_xi(T)/dx^2 + + @d2_yi(T)/dy^2 + + @d2_zi(T)/dz^2 ) ) + return +end + +function diffusion3D() + # Physics + lam = 1.0 #Thermal conductivity + c0 = 2.0 #Heat capacity + lx=ly=lz = 1.0 #Domain length x|y|z + + # Numerics + nx=ny=nz = 512 #Nb gridpoints x|y|z + nt = 100 #Nb time steps + me, = init_global_grid(nx, ny, nz) + dx = lx/(nx_g()-1) #Space step in x + dy = ly/(ny_g()-1) #Space step in y + dz = lz/(nz_g()-1) #Space step in z + + # Initial conditions + T = @ones(nx,ny,nz).*1.7 #Temperature + T2 = copy(T) #Temperature (2nd) + Ci = @ones(nx,ny,nz)./c0 #1/Heat capacity + + # Time loop + dt = min(dx^2,dy^2,dz^2)/lam/maximum(Ci)/6.1 + for it = 1:nt + @hide_communication (16, 2, 2) begin + @parallel step!(T2,T,Ci,lam,dt,dx,dy,dz) + update_halo!(T2) + end + T, T2 = T2, T + end + + finalize_global_grid() +end + +diffusion3D() + +\end{lstlisting} + + \caption{Stencil-based 3-D heat diffusion xPU solver implemented using ImplicitGlobalGrid (and ParallelStencil).} + \label{fig:code} +\end{figure} + + +ImplicitGlobalGrid relies on \texttt{MPI.jl} \cite{byrne2021mpi} to perform halo updates close to hardware limits. For GPU applications, ImplicitGlobalGrid leverages remote direct memory access when CUDA- or ROCm-aware MPI is available and, otherwise, uses highly optimized asynchronous data transfer routines to move the data through the hosts. In addition, pipelining is applied on all stages of the data transfers, improving the effective throughput between GPU and GPU. Low level management of memory, CUDA streams, ROCm queues and signals permits to efficiently reuse send and receive buffers and streams or queues and signals throughout an application without putting the burden of their management to the user. Moreover, all data transfers are performed on non-blocking high-priority streams or queues, allowing to overlap the communication optimally with computation. \texttt{ParallelStencil.jl}, e.g., can do so with a simple macro call (Fig.~\ref{fig:code}, line 36). + +Asymmetrical halos that could result from staggered grids and would represent a considerable complexity are fully circumvented in our approach: a field will only have halos in a given dimension if the corresponding overlap between the local fields is at least two cells wide; no halos are created if the overlap is only one cell wide (redundant computation is done instead). + +ImplicitGlobalGrid is fully interoperable with \texttt{MPI.jl}. By default, it creates a Cartesian MPI communicator, which can be easily retrieved together with other MPI variables. Alternatively, an MPI communicator can be passed to ImplicitGlobalGrid for usage. As a result, ImplicitGlobalGrid's functionality can be seamlessly extended using \texttt{MPI.jl}. + +The modular design of ImplicitGlobalGrid, which heavily relies on multiple dispatch, enables adding support for other hardware with little development effort (including new kind of accelerators as soon as they become programmable with Julia). Support for AMD GPUs using the recently matured \texttt{AMDGPU.jl} package \cite{amdgpu_jl} could be implemented shortly after as a result (Nvidia GPUs are supported using \texttt{CUDA.jl} \cite{besard2018effective}). ImplicitGlobalGrid supports at present distributed parallelization for CUDA- and ROCm-capable GPUs as well as for CPUs. + +\section{Results} +We here report the scaling achieved with the 3-D heat diffusion xPU solver (Fig.~\ref{fig:code}) on up to 2197 Nvidia Tesla P100 GPUs on the Piz Daint Supercomputer at the Swiss National Supercomputing Centre (Fig.~\ref{fig:weak_scaling}; 17^3, i.e., 2197 nodes is the biggest cubic node topology that can be submitted in the normal queue of Piz Daint). We observe a parallel efficiency of 93\% on 2197 GPUs. +Moreover, we have employed ImplicitGlobalGrid (and ParallelStencil) for the parallelization of a solver for nonlinear 3-D poro-visco-elastic two-phase flow and have also conducted a weak scaling experiment on Piz Daint (Fig.~\ref{fig:weak_scaling_realworld}). We observe a parallel efficiency of over 95\% on up to 1024 GPUs. As a performance reference, the solver implemented in Julia achieved 90\% of the per-node performance of the respective original solver written in CUDA C using MPI. + +\begin{figure}[t] + \centerline{\includegraphics[width=8cm]{julia_gpu_par_eff.png}} + \caption{Parallel weak scaling of the 3-D heat diffusion solver (Fig.~\ref{fig:code}) from $1$ to $2197$ ($17^3$) Nvidia P100 GPUs on Piz Daint at CSCS. The blue surface visualizes the 95\% confidence interval of the reported medians (20 samples). The raw data and plotting script are available in \url{github.com/omlins/ImplicitGlobalGrid.jl/tree/master/paper}.} + \label{fig:weak_scaling} +\end{figure} + +\begin{figure}[t] + \centerline{\includegraphics[width=8cm]{julia_c_gpu_par_eff_lin.png}} + \caption{Parallel weak scaling of the nonlinear 3-D poro-visco-elastic two-phase flow solver from $1$ to $1024$ Nvidia P100 GPUs on Piz Daint at CSCS (problem size per GPU is $382^\mathrm{3}$). The blue and orange surfaces visualize the 95\% confidence interval of the reported medians (20 samples). For reference, the Julia solver achieved 90\% of the per-node performance of the corresponding CUDA C solver. The raw data and plotting script are available in \url{github.com/omlins/ImplicitGlobalGrid.jl/tree/master/paper}.} + \label{fig:weak_scaling_realworld} +\end{figure} + +\section{Conclusions} +We have shown that ImplicitGlobalGrid enables scalable performance, performance portability and productivity and addresses the 3 ``P''s in all of its aspects. In addition, we have demonstrated the effectiveness and wide applicability of our approach within geosciences. Our approach is naturally in no sense limited to geosciences as distributed parallelization based on halo updates is employed in many scientific disciplines. We illustrated this in a recent contribution, where we showcased a quantum fluid dynamics solver using the nonlinear Gross-Pitaevski equation implemented with ImplicitGlobalGrid (and ParallelStencil) \cite{pasc21}. + +\section{Acknowledgments} +We would like to thank Julian Samaroo (MIT) for his pro-active support for enabling AMDGPU in ImplicitGlobalGrid. This work was supported by a grant from the Swiss National Supercomputing Centre (CSCS) under project ID c23 through the Platform for Advanced Scientific Computing (PASC) program. + +\input{bib.tex} + +\end{document} + +% Inspired by the International Journal of Computer Applications template diff --git a/paper/paper.yml b/paper/paper.yml new file mode 100644 index 0000000..8d21b15 --- /dev/null +++ b/paper/paper.yml @@ -0,0 +1,28 @@ +title: "Distributed Parallelization of xPU Stencil Computations in Julia" +keywords: + - Julia + - Distributed Parallelization + - xPU + - GPU + - Supercomputing + - Stencil Computations + - Staggered Grid +authors: + - name: Samuel Omlin + orcid: 0000-0000-0000-000X + affiliation: 1 + - name: Ludovic R\"ass + orcid: 0000-0002-1136-899X + affiliation: 2, 3 + - name: Ivan Utkin + orcid: 0000-0000-0000-000X + affiliation: 2, 3 +affiliations: + - name: Swiss National Supercomputing Centre (CSCS), ETH Zurich, Lugano, Switzerland + index: 1 + - name: Laboratory of Hydraulics, Hydrology and Glaciology (VAW), ETH Zurich, Zurich, Switzerland + index: 2 + - name: Swiss Federal Institute for Forest, Snow and Landscape Research (WSL), Birmensdorf, Switzerland + index: 3 +date: June 2022 +bibliography: ref.bib \ No newline at end of file diff --git a/paper/prep.rb b/paper/prep.rb new file mode 100644 index 0000000..781efdb --- /dev/null +++ b/paper/prep.rb @@ -0,0 +1,57 @@ +# metadata generator for JuliaCon +# DO NOT EDIT + +require 'yaml' + +metadata = YAML.load_file('paper.yml') + +for k in ["title", "authors", "affiliations", "keywords", "bibliography"] + raise "Key #{k} not present in metadata" unless metadata.keys().include?(k) +end + +# ENV variables or default for issue/volume/year +issue = ENV["JLCON_ISSUE"] === nil ? 1 : ENV["JLCON_ISSUE"] +volume = ENV["JLCON_VOLUME"] === nil ? 1 : ENV["JLCON_VOLUME"] +year = ENV["JLCON_YEAR"] === nil ? 2020 : ENV["JLCON_YEAR"] +journal_name = "Proceedings of JuliaCon" # hard-coded for now + +open('header.tex', 'w') do |f| + f << "% **************GENERATED FILE, DO NOT EDIT**************\n\n" + f << "\\title{#{metadata["title"]}}\n\n" + for auth in metadata["authors"] + f << "\\author[#{auth["affiliation"]}]{#{auth["name"]}}\n" + end + for aff in metadata["affiliations"] + f << "\\affil[#{aff["index"]}]{#{aff["name"]}}\n" + end + f << "\n\\keywords{" + for i in 0...metadata["keywords"].length-1 + f << "#{metadata["keywords"][i]}, " + end + f << metadata["keywords"].last + f << "}\n\n" + + # hypersetup + f << "\\hypersetup{\n" + f << "pdftitle = {#{metadata["title"]}},\n" + f << "pdfsubject = {JuliaCon 2019 Proceedings},\n" + author_list = metadata['authors'].map { |a| a['name'] }.join(', ') + f << "pdfauthor = {#{author_list}},\n" + keyword_list = metadata['keywords'].join(', ') + f << "pdfkeywords = {#{keyword_list}},\n" + f << "}\n\n" +end + +open('journal_dat.tex', 'w') do |f| + f << "% **************GENERATED FILE, DO NOT EDIT**************\n\n" + f << "\\def\\@journalName{#{journal_name}}\n" + f << "\\def\\@volume{#{volume}}\n" + f << "\\def\\@issue{#{issue}}\n" + f << "\\def\\@year{#{year}}\n" +end + +open('bib.tex', 'w') do |f| + f << "% **************GENERATED FILE, DO NOT EDIT**************\n\n" + f << "\\bibliographystyle{juliacon}\n" + f << "\\bibliography{#{metadata["bibliography"]}}\n" +end diff --git a/paper/ref.bib b/paper/ref.bib new file mode 100644 index 0000000..2f3f127 --- /dev/null +++ b/paper/ref.bib @@ -0,0 +1,76 @@ +@article{bezanson2017julia, + title={Julia: A fresh approach to numerical computing}, + author={Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B}, + journal={SIAM review}, + volume={59}, + number={1}, + pages={65--98}, + year={2017}, + publisher={SIAM} +} + +@INPROCEEDINGS{wse_stencil, + author={Rocki, Kamil and Essendelft, Dirk Van and Sharapov, Ilya and Schreiber, Robert and Morrison, Michael and Kibardin, Vladimir and Portnoy, Andrey and Dietiker, Jean Francois and Syamlal, Madhava and James, Michael}, + booktitle={SC20: International Conference for High Performance Computing, Networking, Storage and Analysis}, + title={Fast Stencil-Code Computation on a Wafer-Scale Processor}, + year={2020}, + volume={}, + number={}, + pages={1-14}, + doi={10.1109/SC41405.2020.00062} +} + +@article{parallelstencil2022, + author={Omlin, S. and R{\"{a}}ss, L.}, + doi = {???}, + journal = {Proc. JuliaCon Conf.}, + title = {{H}igh-performance x{PU} {S}tencil {C}omputations in {J}ulia}, + volume = {?}, + pages = {2}, + year = {2022}, +} + +@inproceedings{byrne2021mpi, + title={{MPI}.jl: {J}ulia bindings for the {M}essage {P}assing {I}nterface}, + author={Byrne, S. and Wilcox, L. C. and Churavy, V.}, + booktitle={Proceedings of the JuliaCon Conferences}, + volume={1}, + pages={68}, + year={2021}, + doi={10.21105/jcon.00068}, + note={\url{https://github.com/JuliaParallel/MPI.jl}} +} + +@article{besard2018effective, + title={Effective extensible programming: unleashing {J}ulia on {GPUs}}, + author={Besard, T. and Foket, C. and De Sutter, B.}, + journal={IEEE Transactions on Parallel and Distributed Systems}, + volume={30}, + number={4}, + pages={827--841}, + year={2018}, + publisher={IEEE} +} + +@misc{amdgpu_jl, + title={{AMDGPU.jl}: {AMD} {GPU} ({ROC}m) programming in {J}ulia }, + author={Samaroo, J. and Besard, T. and Churavy, V. and Lin, D. and other contributors}, + year={2013}, + howpublished={\url{https://github.com/JuliaGPU/AMDGPU.jl}} +} + +@online{juliacon2020scaling, + title={{Solving Nonlinear Multi-Physics on GPU Supercomputers with Julia}}, + author={Omlin, S and R{\"{a}}ss, L and Kwasniewski, G and Malvoisin, B and Podladchikov, Y Y}, + howpublished={\url{https://youtu.be/vPsfZUqI4_0}. JuliaCon conference}, + location={virtual}, + year={2020}, +} + +@online{pasc21, + title={{Solving Nonlinear Partial Differential Equations on GPU Supercomputers Using Julia}}, + author={Omlin, S. and R{\"{a}}ss, L. and Keepfer, N. and Kwasniewski, G. and Malvoisin, B. and Podladchikov, Y. Y.}, + howpublished={PASC21 conference}, + location={virtual}, + year={2021}, +} \ No newline at end of file