diff --git a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt index 70bd38c8f8..720e55f711 100644 --- a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_d_inl0_hrd0.txt @@ -68,124 +68,124 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg' -DATE: 2022-01-25_14:16:42 +DATE: 2022-01-25_14:20:58 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.856312e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.117054e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.137397e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.825223e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.116278e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136496e+07 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.545995 sec - 122,033,876 cycles:u # 0.161 GHz - 124,887,549 instructions:u # 1.02 insn per cycle - 0.827595812 seconds time elapsed +TOTAL : 0.678560 sec + 161,415,403 cycles:u # 0.186 GHz + 156,751,495 instructions:u # 0.97 insn per cycle + 0.978875136 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.110679e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.426272e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.445969e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.153586e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.430525e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.446913e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.528429 sec - 212,492,408 cycles:u # 0.344 GHz - 324,392,180 instructions:u # 1.53 insn per cycle - 0.855244610 seconds time elapsed +TOTAL : 0.829656 sec + 276,039,539 cycles:u # 0.267 GHz + 435,484,575 instructions:u # 1.58 insn per cycle + 1.148005373 seconds time elapsed ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.454644e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.484338e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.484338e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.422586e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.431775e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.431775e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.679076 sec - 1,797,740,736 cycles:u # 2.634 GHz - 5,703,451,553 instructions:u # 3.17 insn per cycle - 0.685914548 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 724) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.819596 sec + 1,855,971,473 cycles:u # 2.367 GHz + 5,501,461,229 instructions:u # 2.96 insn per cycle + 0.827362939 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 2046) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.sse4_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.440269e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.538615e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.538615e+04 ) sec^-1 +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 4.541303e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.574281e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.574281e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.379008 sec - 999,697,554 cycles:u # 2.610 GHz - 2,984,522,633 instructions:u # 2.99 insn per cycle - 0.385703911 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4238) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 0.516400 sec + 1,014,376,023 cycles:u # 2.151 GHz + 2,848,819,366 instructions:u # 2.81 insn per cycle + 0.524245458 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 6725) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.avx2_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.504889e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.864887e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.864887e+04 ) sec^-1 +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 1.026717e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042989e+05 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.202493 sec - 454,040,726 cycles:u # 2.199 GHz - 1,059,443,341 instructions:u # 2.33 insn per cycle - 0.209383271 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3579) (512y: 0) (512z: 0) +TOTAL : 0.299945 sec + 413,359,694 cycles:u # 1.553 GHz + 1,002,695,927 instructions:u # 2.43 insn per cycle + 0.307815465 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512y_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.390164e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828041e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828041e+04 ) sec^-1 +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 1.071714e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.088971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088971e+05 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.184338 sec - 412,725,338 cycles:u # 2.191 GHz - 1,002,043,947 instructions:u # 2.43 insn per cycle - 0.191195153 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3424) (512y: 70) (512z: 0) +TOTAL : 0.293746 sec + 397,965,372 cycles:u # 1.535 GHz + 869,220,580 instructions:u # 2.18 insn per cycle + 0.301551761 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4235) (512y: 6) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512z_d_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.956454e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.192969e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.192969e+04 ) sec^-1 +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 7.757476e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.851080e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.851080e+04 ) sec^-1 MeanMatrixElemValue = ( 4.061783e+02 +- 3.760219e+02 ) GeV^-2 -TOTAL : 0.245867 sec - 392,051,485 cycles:u # 1.570 GHz - 554,910,571 instructions:u # 1.42 insn per cycle - 0.252967148 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1243) (512y: 69) (512z: 2828) +TOTAL : 0.352223 sec + 384,676,169 cycles:u # 1.209 GHz + 729,854,481 instructions:u # 1.90 insn per cycle + 0.360061620 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2823) (512y: 10) (512z: 3069) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_f_inl0_hrd0.txt index cd06ea2a66..94809f249e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_manu/log_ggttg_manu_f_inl0_hrd0.txt @@ -68,124 +68,124 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_f_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg' -DATE: 2022-01-25_14:16:56 +DATE: 2022-01-25_14:21:15 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.450091e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.543084e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.681459e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.452236e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.494120e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617743e+07 ) sec^-1 MeanMatrixElemValue = ( 4.063193e+02 +- 3.761164e+02 ) GeV^-2 -TOTAL : 0.565875 sec - 117,013,199 cycles:u # 0.156 GHz - 118,940,137 instructions:u # 1.02 insn per cycle - 0.846805180 seconds time elapsed +TOTAL : 2.027534 sec + 3,223,172,893 cycles:u # 1.532 GHz + 6,391,242,213 instructions:u # 1.98 insn per cycle + 2.318532699 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.163552e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.644314e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.752231e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.210248e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.623085e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.712893e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630098e+02 +- 4.770718e+02 ) GeV^-2 -TOTAL : 0.604354 sec - 156,195,359 cycles:u # 0.190 GHz - 211,065,413 instructions:u # 1.35 insn per cycle - 0.900705504 seconds time elapsed +TOTAL : 0.733941 sec + 219,508,479 cycles:u # 0.236 GHz + 311,785,727 instructions:u # 1.42 insn per cycle + 1.034965305 seconds time elapsed ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.479745e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504331e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.504331e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.062574e+02 +- 3.760985e+02 ) GeV^-2 -TOTAL : 0.669775 sec - 1,777,873,650 cycles:u # 2.640 GHz - 5,764,128,785 instructions:u # 3.24 insn per cycle - 0.676744215 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 642) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.509090e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.519854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.519854e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.062452e+02 +- 3.760918e+02 ) GeV^-2 +TOTAL : 0.791408 sec + 1,796,768,986 cycles:u # 2.372 GHz + 5,508,526,901 instructions:u # 3.07 insn per cycle + 0.798984066 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 1516) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.sse4_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.065292e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.332311e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.332311e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.062575e+02 +- 3.760986e+02 ) GeV^-2 -TOTAL : 0.211716 sec - 555,246,674 cycles:u # 2.577 GHz - 1,646,232,445 instructions:u # 2.96 insn per cycle - 0.218258814 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 4956) (avx2: 0) (512y: 0) (512z: 0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 9.325811e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.483006e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.483006e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.062454e+02 +- 3.760920e+02 ) GeV^-2 +TOTAL : 0.315055 sec + 518,326,539 cycles:u # 1.848 GHz + 1,523,209,891 instructions:u # 2.94 insn per cycle + 0.322644568 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 7200) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.avx2_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.641075e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.752698e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.752698e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.062816e+02 +- 3.761081e+02 ) GeV^-2 -TOTAL : 0.108140 sec - 241,711,522 cycles:u # 2.160 GHz - 582,522,043 instructions:u # 2.41 insn per cycle - 0.114670666 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3888) (512y: 0) (512z: 0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 1.424340e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.459759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.459759e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.061251e+02 +- 3.759512e+02 ) GeV^-2 +TOTAL : 0.260392 sec + 308,713,278 cycles:u # 1.395 GHz + 575,087,613 instructions:u # 1.86 insn per cycle + 0.268266751 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5754) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512y_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.727358e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.851170e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.851170e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.062816e+02 +- 3.761081e+02 ) GeV^-2 -TOTAL : 0.103278 sec - 224,975,510 cycles:u # 2.098 GHz - 550,656,987 instructions:u # 2.45 insn per cycle - 0.110214895 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3727) (512y: 28) (512z: 0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 2.036344e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.102370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.102370e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.061196e+02 +- 3.759457e+02 ) GeV^-2 +TOTAL : 0.232549 sec + 232,285,409 cycles:u # 1.208 GHz + 496,424,614 instructions:u # 2.14 insn per cycle + 0.241380111 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4834) (512y: 10) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512z_f_inl0_hrd0/check.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.386471e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.465050e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.465050e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.062815e+02 +- 3.761080e+02 ) GeV^-2 -TOTAL : 0.126749 sec - 204,829,357 cycles:u # 1.569 GHz - 318,275,168 instructions:u # 1.55 insn per cycle - 0.133380615 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1667) (512y: 13) (512z: 2941) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 1.185132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.206866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.206866e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.061196e+02 +- 3.759456e+02 ) GeV^-2 +TOTAL : 0.280693 sec + 270,327,383 cycles:u # 1.104 GHz + 491,886,833 instructions:u # 1.82 insn per cycle + 0.288209120 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2861) (512y: 1) (512z: 3255) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt index 2aad719c45..f3dee73fc9 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_d_inl0_hrd0.txt @@ -68,124 +68,124 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_d_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg' -DATE: 2022-01-25_14:17:10 +DATE: 2022-01-25_14:21:34 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.773750e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.774323e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.774522e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.771693e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.772244e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.772427e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 1.932934 sec - 1,605,478,982 cycles:u # 0.709 GHz - 2,963,493,572 instructions:u # 1.85 insn per cycle - 2.334498531 seconds time elapsed +TOTAL : 125.786545 sec + 305,471,026,844 cycles:u # 2.527 GHz + 406,666,246,501 instructions:u # 1.33 insn per cycle + 126.263101680 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.177878e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178312e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.178352e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.186291e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186725e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186763e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.589267 sec - 2,965,042,933 cycles:u # 0.756 GHz - 6,271,389,501 instructions:u # 2.12 insn per cycle - 3.991807812 seconds time elapsed +TOTAL : 3.423749 sec + 3,149,860,460 cycles:u # 0.853 GHz + 6,290,083,805 instructions:u # 2.00 insn per cycle + 3.830867417 seconds time elapsed ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.220785e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.221282e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.221282e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.206873e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.207009e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.207009e+01 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 3.773023 sec - 10,035,205,928 cycles:u # 2.658 GHz - 28,435,809,320 instructions:u # 2.83 insn per cycle - 3.779595686 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 9929) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.910361 sec + 9,914,273,386 cycles:u # 2.560 GHz + 28,316,585,099 instructions:u # 2.86 insn per cycle + 3.918085596 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:37700) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.sse4_d_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.317096e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317263e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.317263e+02 ) sec^-1 +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 1.050058e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.050087e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050087e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 2.072763 sec - 5,504,103,370 cycles:u # 2.653 GHz - 15,059,556,563 instructions:u # 2.74 insn per cycle - 2.079623927 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:66442) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.738092 sec + 6,920,997,741 cycles:u # 2.565 GHz + 14,832,308,562 instructions:u # 2.14 insn per cycle + 2.748307874 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:286477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.avx2_d_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.609041e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.609673e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.609673e+02 ) sec^-1 +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 2.423584e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.423733e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.423733e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 1.049614 sec - 2,369,463,435 cycles:u # 2.251 GHz - 5,258,946,761 instructions:u # 2.22 insn per cycle - 1.056614540 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:57086) (512y: 0) (512z: 0) +TOTAL : 1.257892 sec + 2,575,094,618 cycles:u # 2.105 GHz + 4,601,372,328 instructions:u # 1.79 insn per cycle + 1.265332602 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:204409) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512y_d_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.884214e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.884983e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.884983e+02 ) sec^-1 +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 2.743878e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.744133e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.744133e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 0.949648 sec - 2,146,355,667 cycles:u # 2.252 GHz - 4,820,661,410 instructions:u # 2.25 insn per cycle - 0.956114085 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:57672) (512y: 51) (512z: 0) +TOTAL : 1.132636 sec + 2,286,349,144 cycles:u # 2.084 GHz + 4,080,399,247 instructions:u # 1.78 insn per cycle + 1.140198850 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:191655) (512y: 6) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512z_d_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+NOVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.935316e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.936110e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.936110e+02 ) sec^-1 +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 2.661415e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.661623e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.661623e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064592e-05 +- 1.952360e-05 ) GeV^-6 -TOTAL : 0.933322 sec - 1,455,371,898 cycles:u # 1.554 GHz - 2,409,725,367 instructions:u # 1.66 insn per cycle - 0.939844139 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 6157) (512y: 49) (512z:52242) +TOTAL : 1.161300 sec + 1,641,741,219 cycles:u # 1.458 GHz + 3,102,064,182 instructions:u # 1.89 insn per cycle + 1.168715795 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8156) (512y: 10) (512z:186456) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. diff --git a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_f_inl0_hrd0.txt index 1a3b426165..f81f0acc7e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_manu/log_ggttggg_manu_f_inl0_hrd0.txt @@ -68,124 +68,124 @@ make[1]: Entering directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for `all.512z_f_inl0_hrd0_hasCurand'. make[1]: Leaving directory `/data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg' -DATE: 2022-01-25_14:18:10 +DATE: 2022-01-25_14:24:49 On itscrd70.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.422566e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.423612e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.423930e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.278778e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.279858e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.280175e+02 ) sec^-1 MeanMatrixElemValue = ( 2.064444e-05 +- 1.952232e-05 ) GeV^-6 -TOTAL : 1.549989 sec - 1,177,853,697 cycles:u # 0.651 GHz - 2,040,892,092 instructions:u # 1.73 insn per cycle - 1.892662361 seconds time elapsed +TOTAL : 100.338493 sec + 244,269,558,690 cycles:u # 2.511 GHz + 310,349,011,676 instructions:u # 1.27 insn per cycle + 100.720988386 seconds time elapsed ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 11.6.55 (icx 20210400, clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.656440e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.657641e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.657762e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.668798e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.669974e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.670122e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856835e-04 +- 8.333436e-05 ) GeV^-6 -TOTAL : 2.057268 sec - 1,711,873,248 cycles:u # 0.736 GHz - 3,204,677,747 instructions:u # 1.87 insn per cycle - 2.389804942 seconds time elapsed +TOTAL : 2.172779 sec + 1,709,101,936 cycles:u # 0.708 GHz + 3,123,026,646 instructions:u # 1.83 insn per cycle + 2.518492608 seconds time elapsed ========================================================================= runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_f_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.665605e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.666055e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.666055e+01 ) sec^-1 -MeanMatrixElemValue = ( 2.064526e-05 +- 1.952298e-05 ) GeV^-6 -TOTAL : 3.554262 sec - 9,464,551,330 cycles:u # 2.660 GHz - 27,482,618,811 instructions:u # 2.90 insn per cycle - 3.560708184 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 6939) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.207154e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.207309e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.207309e+01 ) sec^-1 +MeanMatrixElemValue = ( 2.064562e-05 +- 1.952333e-05 ) GeV^-6 +TOTAL : 3.914611 sec + 10,084,659,249 cycles:u # 2.601 GHz + 29,156,809,500 instructions:u # 2.89 insn per cycle + 3.922099700 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:18553) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.sse4_f_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.867283e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.867924e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.867924e+02 ) sec^-1 -MeanMatrixElemValue = ( 2.064525e-05 +- 1.952297e-05 ) GeV^-6 -TOTAL : 0.954890 sec - 2,534,286,944 cycles:u # 2.645 GHz - 7,850,588,410 instructions:u # 3.10 insn per cycle - 0.961491721 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4:67544) (avx2: 0) (512y: 0) (512z: 0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 2.486067e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.486267e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.486267e+02 ) sec^-1 +MeanMatrixElemValue = ( 2.064562e-05 +- 1.952333e-05 ) GeV^-6 +TOTAL : 1.234215 sec + 2,949,890,538 cycles:u # 2.459 GHz + 7,556,763,781 instructions:u # 2.56 insn per cycle + 1.242348548 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4:288299) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.avx2_f_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.210139e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.212254e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.212254e+02 ) sec^-1 -MeanMatrixElemValue = ( 2.064831e-05 +- 1.952605e-05 ) GeV^-6 -TOTAL : 0.529340 sec - 1,193,583,813 cycles:u # 2.241 GHz - 2,725,787,167 instructions:u # 2.28 insn per cycle - 0.536323172 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:60299) (512y: 0) (512z: 0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 5.083534e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.084270e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.084270e+02 ) sec^-1 +MeanMatrixElemValue = ( 2.064543e-05 +- 1.952317e-05 ) GeV^-6 +TOTAL : 0.672094 sec + 1,253,849,680 cycles:u # 1.967 GHz + 2,326,349,562 instructions:u # 1.86 insn per cycle + 0.680043569 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:204143) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512y_f_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.655668e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.658203e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.658203e+02 ) sec^-1 -MeanMatrixElemValue = ( 2.064831e-05 +- 1.952605e-05 ) GeV^-6 -TOTAL : 0.487144 sec - 1,098,418,373 cycles:u # 2.238 GHz - 2,501,521,850 instructions:u # 2.28 insn per cycle - 0.493610804 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2:60739) (512y: 22) (512z: 0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 5.222069e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.222899e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.222899e+02 ) sec^-1 +MeanMatrixElemValue = ( 2.064543e-05 +- 1.952317e-05 ) GeV^-6 +TOTAL : 0.657276 sec + 1,219,896,250 cycles:u # 1.961 GHz + 2,079,229,311 instructions:u # 1.70 insn per cycle + 0.664920806 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2:191150) (512y: 10) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests. ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512z_f_inl0_hrd0/check.exe -p 1 256 1 OMP= -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 10.2.0] [inlineHel=0] [hardcodeCIPC=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [icx 20210400 (clang 13.0.0, gcc 10.2.0)] [inlineHel=0] [hardcodeCIPC=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+NOVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.811485e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.814350e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.814350e+02 ) sec^-1 -MeanMatrixElemValue = ( 2.064831e-05 +- 1.952605e-05 ) GeV^-6 -TOTAL : 0.475498 sec - 741,701,282 cycles:u # 1.550 GHz - 1,256,325,251 instructions:u # 1.69 insn per cycle - 0.481929398 seconds time elapsed -=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 8758) (512y: 7) (512z:52868) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=NO] +EvtsPerSec[Rmb+ME] (23) = ( 5.300174e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.301102e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.301102e+02 ) sec^-1 +MeanMatrixElemValue = ( 2.064542e-05 +- 1.952317e-05 ) GeV^-6 +TOTAL : 0.648929 sec + 846,208,557 cycles:u # 1.376 GHz + 1,579,766,487 instructions:u # 1.87 insn per cycle + 0.656608558 seconds time elapsed +=Symbols in CPPProcess.o= (~sse4: 0) (avx2: 7109) (512y: 1) (512z:186626) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2020/madgraph4gpuX/epochX/cudacpp/gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe [ PASSED ] 6 tests.