From f6d93af2eee3eb0454c6d29215913dfd255c9606 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Wed, 24 Jan 2024 16:46:46 +0100
Subject: [PATCH 01/27] Add id to track the source particle index

---
 PhysicsPackages/eigenPhysicsPackage_class.f90       | 2 ++
 PhysicsPackages/fixedSourcePhysicsPackage_class.f90 | 1 +
 2 files changed, 3 insertions(+)

diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 4ede0e8f4..66ac502d6 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -195,6 +195,8 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
         ! Obtain particle current cycle dungeon
         call self % thisCycle % copy(neutron, n)
 
+        neutron % showerID = n
+
         bufferLoop: do
           call self % geom % placeCoord(neutron % coords)
 
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index b1dbb4e86..2b608c7e5 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -181,6 +181,7 @@ subroutine cycles(self, tally, N_cycles)
 
         ! Obtain particle from dungeon
         call self % thisCycle % copy(p, n)
+        p % showerID = n
 
         bufferLoop: do
 

From b1f12f81e9199303a02653709ce5ff3b3b892071 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Wed, 24 Jan 2024 19:06:00 +0100
Subject: [PATCH 02/27] Initial implementation of the dungeon sort

---
 ParticleObjects/particleDungeon_class.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ParticleObjects/particleDungeon_class.f90 b/ParticleObjects/particleDungeon_class.f90
index 62d6d45ba..d1af3d456 100644
--- a/ParticleObjects/particleDungeon_class.f90
+++ b/ParticleObjects/particleDungeon_class.f90
@@ -493,8 +493,8 @@ subroutine sortByBroodID(self, k)
       id = self % prisoners(i) % broodID
 
       if (id < 1 .or. id > k) call fatalError(Here, 'Brood ID out of range: '//numToChar(id))
-
       count(id) = count(id) + 1
+
     end do
 
     ! Convert to starting index

From 04b37e13707c8ae947b9c6910c360fa1bfc23829 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Sat, 27 Jan 2024 16:49:02 +0100
Subject: [PATCH 03/27] Add range checks to the sorting of particleDungeon

Determine the maximum value of the key without implicit assumptions.
---
 ParticleObjects/particleDungeon_class.f90 | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ParticleObjects/particleDungeon_class.f90 b/ParticleObjects/particleDungeon_class.f90
index d1af3d456..89b9db5a6 100644
--- a/ParticleObjects/particleDungeon_class.f90
+++ b/ParticleObjects/particleDungeon_class.f90
@@ -491,10 +491,8 @@ subroutine sortByBroodID(self, k)
     count = 0
     do i = 1, self % pop
       id = self % prisoners(i) % broodID
-
       if (id < 1 .or. id > k) call fatalError(Here, 'Brood ID out of range: '//numToChar(id))
       count(id) = count(id) + 1
-
     end do
 
     ! Convert to starting index

From 1ad1ab874bfc7d5ce350462e148e549d2ca828cf Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Sat, 17 Feb 2024 15:01:17 +0100
Subject: [PATCH 04/27] Rename showerID to broodID

---
 PhysicsPackages/eigenPhysicsPackage_class.f90       | 2 +-
 PhysicsPackages/fixedSourcePhysicsPackage_class.f90 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 66ac502d6..5eedbb931 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -195,7 +195,7 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
         ! Obtain particle current cycle dungeon
         call self % thisCycle % copy(neutron, n)
 
-        neutron % showerID = n
+        neutron % broodID = n
 
         bufferLoop: do
           call self % geom % placeCoord(neutron % coords)
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index 2b608c7e5..4e7da1caf 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -181,7 +181,7 @@ subroutine cycles(self, tally, N_cycles)
 
         ! Obtain particle from dungeon
         call self % thisCycle % copy(p, n)
-        p % showerID = n
+        p % broodID = n
 
         bufferLoop: do
 

From 39610a15b95d7a78a9b6cb2b3f78fd957b1a4029 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Sat, 17 Feb 2024 15:13:05 +0100
Subject: [PATCH 05/27] Apply PR comments

---
 PhysicsPackages/eigenPhysicsPackage_class.f90       | 2 --
 PhysicsPackages/fixedSourcePhysicsPackage_class.f90 | 1 -
 2 files changed, 3 deletions(-)

diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 5eedbb931..4ede0e8f4 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -195,8 +195,6 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
         ! Obtain particle current cycle dungeon
         call self % thisCycle % copy(neutron, n)
 
-        neutron % broodID = n
-
         bufferLoop: do
           call self % geom % placeCoord(neutron % coords)
 
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index 4e7da1caf..b1dbb4e86 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -181,7 +181,6 @@ subroutine cycles(self, tally, N_cycles)
 
         ! Obtain particle from dungeon
         call self % thisCycle % copy(p, n)
-        p % broodID = n
 
         bufferLoop: do
 

From 265a872ed91f863703036f6a378b543039cb6759 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Sat, 17 Feb 2024 15:27:32 +0100
Subject: [PATCH 06/27] Remove reuse of random numbers in first flights

We were reusing first few random numbers following the source generation
(state of RNG was the same at the beginning of sampling the source
particle and its first flight). This commit moves the RNG back before
the source generation is performed, thus preventing the reuse.
---
 ParticleObjects/Source/source_inter.f90 | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/ParticleObjects/Source/source_inter.f90 b/ParticleObjects/Source/source_inter.f90
index 5e3c141f3..4bf415fa5 100644
--- a/ParticleObjects/Source/source_inter.f90
+++ b/ParticleObjects/Source/source_inter.f90
@@ -100,6 +100,7 @@ subroutine generate(self, dungeon, n, rand)
       type(particleDungeon), intent(inout) :: dungeon
       integer(shortInt), intent(in)        :: n
       class(RNG), intent(in)               :: rand
+      type(RNG)                            :: originalRNG
       type(RNG), save                      :: pRand
       integer(shortInt)                    :: i
       !$omp threadprivate(pRand)
@@ -107,12 +108,17 @@ subroutine generate(self, dungeon, n, rand)
       ! Set dungeon size to begin
       call dungeon % setSize(n)
 
+      ! Move back in the sequence to avoid reusing few first random numbers
+      ! in transport
+      originalRNG = rand
+      call originalRNG % stride(-n)
+
       ! Generate n particles to populate dungeon
       ! TODO: advance the rand after source generation!
       !       This should prevent reusing RNs during transport
       !$omp parallel do
       do i = 1, n
-        pRand = rand
+        pRand = originalRNG
         call pRand % stride(i)
         call dungeon % replace(self % sampleParticle(pRand), i)
       end do

From c328a21c60fdd7b9efce8154e024a9ab9d54218b Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Mon, 29 Jan 2024 14:52:35 +0100
Subject: [PATCH 07/27] Add MPI to the compilation

No communication takes place at this stage.
---
 Apps/scone.f90               |  7 ++++
 CMakeLists.txt               | 16 +++++++++
 SharedModules/CMakeLists.txt |  3 +-
 SharedModules/mpi_func.f90   | 67 ++++++++++++++++++++++++++++++++++++
 4 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 SharedModules/mpi_func.f90

diff --git a/Apps/scone.f90 b/Apps/scone.f90
index 44ee4c2c7..db04c818f 100644
--- a/Apps/scone.f90
+++ b/Apps/scone.f90
@@ -3,6 +3,7 @@ program scone
   use numPrecision
   use genericProcedures,          only : printStart
   use openmp_func,                only : ompSetNumThreads
+  use mpi_func,                   only : mpiInit, mpiFinalise
   use commandLineUI,              only : getInputFile, clOptionIsPresent, addClOption, getFromCL
   use dictionary_class,           only : dictionary
   use dictParser_func,            only : fileToDict
@@ -29,6 +30,9 @@ program scone
   ! Get path to input file
   call getInputFile(inputPath)
 
+  ! Initialize MPI
+  call mpiInit()
+
   ! Set Number of threads
   if (clOptionIsPresent('--omp')) then
     call getFromCL(cores, '--omp', 1)
@@ -56,6 +60,9 @@ program scone
   call core % run()
 
   call timerStop(timerIdx)
+
+  call mpiFinalise()
+
   print *, 'Total calculation time: ', trim(secToChar(timerTime(timerIdx)))
   print *, 'Have a good day and enjoy your result analysis!'
 end program scone
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 12f844370..e6cf3e0b2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,6 +19,8 @@ option(LTO         "Enables link-time optimisation"     ON)
 option(COVERAGE    "Collect Coverage Info"              OFF)
 option(DEBUG       "Enable extra run-time checks"       OFF)
 option(OPENMP      "Enable parallelism with OpenMP"     ON)
+option(MPI         "Enable parallelism with MPI"        ON)
+
 
 # Include local cmake modules. TODO: Test on WINDOWS!!!
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
@@ -77,6 +79,12 @@ if(OPENMP)
   set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OpenMP_Fortran_FLAGS}")
 endif()
 
+# Set up MPI
+if(MPI)
+  find_package(MPI COMPONENTS Fortran REQUIRED)
+  set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DMPI")
+endif()
+
 
 ####################################################################################################
 # CHECK FOR DEPENDENCIES
@@ -138,6 +146,9 @@ get_property(SRCS GLOBAL PROPERTY SRCS_LIST)
 add_library(scone STATIC ${SRCS})
 target_compile_options(scone PRIVATE ${scone_extra_flags} )
 target_link_libraries(scone  ${LAPACK_LIBRARIES} )
+if(MPI)
+  target_link_libraries(scone MPI::MPI_Fortran)
+endif()
 
 if(LTO)
   set_property(TARGET scone PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
@@ -147,6 +158,10 @@ endif()
 # COMPILE SOLVERS
 add_executable(scone.out ./Apps/scone.f90 )
 target_link_libraries(scone.out scone )
+if(MPI)
+  target_link_libraries(scone.out MPI::MPI_Fortran)
+endif()
+
 
 ####################################################################################################
 # COMPILE UNIT TESTS
@@ -206,6 +221,7 @@ message(STATUS "  Link-time optimisation:     " ${LTO})
 message(STATUS "  Code coverage logging:      " ${COVERAGE})
 message(STATUS "  Extra runtime debug checks: " ${DEBUG})
 message(STATUS "  OpenMP parallelism:         " ${OPENMP})
+message(STATUS "  MPI parallelism:            " ${MPI})
 message(STATUS "  Fortran compiler:     " ${CMAKE_Fortran_COMPILER})
 message(STATUS "  Compiler version:     " ${CMAKE_Fortran_COMPILER_VERSION})
 message(STATUS "  OpenMP version:       " ${OpenMP_CXX_VERSION})
diff --git a/SharedModules/CMakeLists.txt b/SharedModules/CMakeLists.txt
index 2a8baa4ce..552014699 100644
--- a/SharedModules/CMakeLists.txt
+++ b/SharedModules/CMakeLists.txt
@@ -13,7 +13,8 @@ add_sources( ./genericProcedures.f90
              ./charLib_func.f90
              ./openmp_func.f90
              ./errors_mod.f90
-             ./colours_func.f90)
+             ./colours_func.f90
+             ./mpi_func.f90)
 
 add_unit_tests( ./Tests/grid_test.f90
                 ./Tests/energyGrid_test.f90
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
new file mode 100644
index 000000000..304f99efc
--- /dev/null
+++ b/SharedModules/mpi_func.f90
@@ -0,0 +1,67 @@
+module mpi_func
+  use numPrecision
+#ifdef MPI
+  use mpi_f08
+#endif
+  implicit none
+
+  integer(shortInt), private :: worldSize
+  integer(shortInt), private :: rank
+
+contains
+
+  !!
+  !! Initialise MPI environment
+  !!
+  !! Needs to be called at the beginning of calculation before any MPI calls
+  !!
+  subroutine mpiInit()
+#ifdef MPI
+    integer(shortInt) :: ierr
+    call mpi_init(ierr)
+
+    call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
+
+    call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
+
+#else
+    worldSize = 1
+    rank = 0
+#endif
+  end subroutine mpiInit
+
+  !!
+  !! Finalise MPI environment
+  !!
+  !! Needs to be called at the end of calculation after all MPI calls
+  !!
+  subroutine mpiFinalise()
+#ifdef MPI
+    integer(shortInt) :: ierr
+    call MPI_Finalize(ierr)
+#endif
+  end subroutine mpiFinalise
+
+  !!
+  !! Get MPI world size
+  !!
+  !! It is the number of processes launched concurrently and communicating
+  !! with each other
+  !!
+  function getMPIWorldSize() result(size)
+    integer(shortInt) :: size
+    size = worldSize
+  end function getMPIWorldSize
+
+  !!
+  !! Get MPI rank
+  !!
+  !! It is the number of the process in the MPI world.
+  !! Unlike in Fortran, it starts from 0
+  !!
+  function getMPIRank() result(r)
+    integer(shortInt) :: r
+    r = rank
+  end function getMPIRank
+
+end module mpi_func

From fa1702def2c926f367f085d210cf02d61d12f400 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Wed, 31 Jan 2024 18:35:32 +0100
Subject: [PATCH 08/27] Explicitly reduce bins before they are closed

---
 SharedModules/mpi_func.f90                    |  12 +
 .../TallyClerks/Tests/collisionClerk_test.f90 |   2 +
 .../Tests/collisionProbabilityClerk_test.f90  |   4 +-
 .../Tests/keffAnalogClerk_test.f90            |   3 +-
 .../Tests/keffImplicitClerk_test.f90          |   2 +
 Tallies/TallyClerks/Tests/mgXsClerk_test.f90  |   2 +
 .../Tests/shannonEntropyClerk_test.f90        |   1 +
 .../TallyClerks/Tests/simpleFMClerk_test.f90  |   1 +
 Tallies/TallyClerks/Tests/trackClerk_test.f90 |   1 +
 Tallies/Tests/scoreMemory_test.f90            | 600 ++++++------
 Tallies/scoreMemory_class.f90                 | 922 +++++++++---------
 Tallies/tallyAdmin_class.f90                  |   3 +
 12 files changed, 803 insertions(+), 750 deletions(-)

diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 304f99efc..45f9301db 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -53,6 +53,18 @@ function getMPIWorldSize() result(size)
     size = worldSize
   end function getMPIWorldSize
 
+  !!
+  !! Return true if the process is the master process
+  !!
+  !! The master process is the one with rank 0
+  !!
+  function isMaster()
+    logical(defBool) :: isMaster
+
+    isMaster = (rank == 0)
+
+  end function isMaster
+
   !!
   !! Get MPI rank
   !!
diff --git a/Tallies/TallyClerks/Tests/collisionClerk_test.f90 b/Tallies/TallyClerks/Tests/collisionClerk_test.f90
index bb4e67f39..97f13a6a2 100644
--- a/Tallies/TallyClerks/Tests/collisionClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/collisionClerk_test.f90
@@ -219,6 +219,7 @@ subroutine testScoring(this)
     p % w = 1000.3_defReal
     call clerk % reportInColl(p, nucData, mem, .true.)
 
+    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Verify results of scoring
@@ -338,6 +339,7 @@ subroutine testScoringVirtual(this)
     p % w = 1.3_defReal
     call clerk % reportInColl(p, nucData, mem, .false.)
 
+    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Verify results of scoring
diff --git a/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90 b/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90
index 79a3b5dae..dbd832b3d 100644
--- a/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90
@@ -131,9 +131,11 @@ subroutine testSimpleUseCase(this)
     p % w = 0.9
     p % preCollision % matIdx = 88
     call this % clerk % reportInColl(p, xsData, mem, .false.)
-    call this % clerk % reportCycleEnd(pop, mem)
+
 
     ! Close cycle
+    call mem % reduceBins()
+    call this % clerk % reportCycleEnd(pop, mem)
     call mem % closeCycle(ONE)
 
     ! Verify results
diff --git a/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90 b/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
index 9eed16980..c98afff1c 100644
--- a/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
@@ -94,7 +94,8 @@ subroutine test1CycleBatch(this)
     call pit % detain(p)
     pit % k_eff = 1.2_defReal
 
-    call this % clerk % reportCycleEnd(pit,mem)
+    call mem % reduceBins()
+    call this % clerk % reportCycleEnd(pit, mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Validate results
diff --git a/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90 b/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90
index 16563faa2..0b518f897 100644
--- a/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90
@@ -94,6 +94,7 @@ subroutine test1CycleBatch(this)
     call this % clerk % reportHist(p, this % nucData, mem)
 
     ! End cycle
+    call mem % reduceBins()
     call pit % detain(p)
     call this % clerk % reportCycleEnd(pit, mem)
     call pit % release(p)
@@ -113,6 +114,7 @@ subroutine test1CycleBatch(this)
     call this % clerk % reportHist(p, this % nucData, mem)
 
     ! End cycle
+    call mem % reduceBins()
     call pit % detain(p)
     call this % clerk % reportCycleEnd(pit, mem)
     call pit % release(p)
diff --git a/Tallies/TallyClerks/Tests/mgXsClerk_test.f90 b/Tallies/TallyClerks/Tests/mgXsClerk_test.f90
index ab47d32a5..d14e284c4 100644
--- a/Tallies/TallyClerks/Tests/mgXsClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/mgXsClerk_test.f90
@@ -139,6 +139,7 @@ subroutine testScoring_clerk1(this)
     p % E = 0.1_defReal
 
     call this % clerk_test1 % reportOutColl(p, N_2N, 0.75_defReal, this % nucData, mem)
+    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Process and get results
@@ -216,6 +217,7 @@ subroutine testScoring_clerk2(this)
     p % E = 1.1_defReal
 
     call this % clerk_test2 % reportOutColl(p, N_2N, 0.75_defReal, this % nucData, mem)
+    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Process and get results
diff --git a/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90 b/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
index eae641eab..6a8fea657 100644
--- a/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
@@ -110,6 +110,7 @@ subroutine testSimpleUseCase(this)
     call this % clerk % reportCycleEnd(pop,  mem)
 
     ! Close cycle
+    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Verify results for all particles in one bine
diff --git a/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90 b/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90
index a2a408a15..e28358dde 100644
--- a/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90
@@ -126,6 +126,7 @@ subroutine testSimpleUseCase(this)
     call this % clerk % reportCycleEnd(pop, mem)
 
     ! Close cycle
+    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Verify results
diff --git a/Tallies/TallyClerks/Tests/trackClerk_test.f90 b/Tallies/TallyClerks/Tests/trackClerk_test.f90
index ce2d651c5..82ece4302 100644
--- a/Tallies/TallyClerks/Tests/trackClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/trackClerk_test.f90
@@ -213,6 +213,7 @@ subroutine testScoring(this)
     p % w = 1.3_defReal
     call clerk % reportPath(p, L, nucData, mem)
 
+    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Verify results of scoring
diff --git a/Tallies/Tests/scoreMemory_test.f90 b/Tallies/Tests/scoreMemory_test.f90
index 927c64a80..0c16a437d 100644
--- a/Tallies/Tests/scoreMemory_test.f90
+++ b/Tallies/Tests/scoreMemory_test.f90
@@ -1,300 +1,302 @@
-module scoreMemory_test
-  use numPrecision
-  use genericProcedures, only : numToChar
-  use scoreMemory_class, only : scoreMemory
-  use funit
-
-  implicit none
-
-@testParameter(constructor = new_testNumber)
-  type, extends(AbstractTestParameter) :: testNumber
-    integer(shortInt) :: i
-  contains
-    procedure :: toString
-  end type testNumber
-
-@testCase(constructor=newTest)
-  type, extends(ParameterizedTestCase) :: test_scoreMemory
-    private
-    integer(longInt)                            :: Ncycles
-    integer(shortInt)                           :: batchSize
-    real(defReal),dimension(:), allocatable     :: scores
-    integer(shortInt), dimension(:),allocatable :: scoresInt
-
-  end type test_scoreMemory
-
-
-contains
-
-  !!
-  !! Build new test parameter form integer
-  !!
-  function new_testNumber(i) result (tstNum)
-    integer(shortInt) :: i
-    type(testNumber)  :: tstNum
-
-    tstNum % i = i
-
-  end function new_testNumber
-
-  !!
-  !! Write test parameter to string
-  !!
-  function toString(this) result(string)
-    class(testNumber), intent(in) :: this
-    character(:), allocatable :: string
-    character(nameLen)        :: str
-
-    write (str,*) this % i
-    string = str
-
-  end function toString
-
-  !!
-  !! Construct test case
-  !!
-  !!
-  !!
-  function newTest(testParam) result(tst)
-    type(testNumber), intent(in)     :: testParam
-    type(test_scoreMemory)           :: tst
-    real(defReal),dimension(200)     :: random
-    integer(shortInt)                :: seed, i
-    integer(shortInt),parameter      :: A = 2469   ! Multiplier of LC PRNG
-    integer(shortInt),parameter      :: M = 65521  ! Modulus of PRNG
-
-    ! Load batchSize
-    tst % batchSize = testParam % i
-    tst % Ncycles   = 10 * tst % batchSize
-
-    ! Generate a vector of 20 pseudo-random numbers in <0;1>
-    ! Generator is not sophisticated but robust
-    seed = 9294
-    do i=1,200
-      seed = mod(A * seed , M)
-      random(i)    = seed / real(M,defReal)
-    end do
-
-    ! Generate some scores and calculate their sum and sum of squares
-    tst  % scores    = TWO + sin(PI * random - PI/2)
-    tst % scoresInt = int(random * 100, shortInt)
-
-  end function newTest
-
-!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-!! PROPER TESTS BEGIN HERE
-!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-
-  !!
-  !! Test acoring for a case with batchSize == 1
-  !! Look at the end of the file to find MATLAB SCRIPT used to generate reference values
-  !!
-@Test(cases=[1])
-  subroutine testScoring(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-    integer(shortInt)                      :: i, j
-    real(defReal)                          :: res1, res2, STD
-    real(defReal), parameter :: TOL = 1.0E-9
-
-    ! Initialise score memory
-    call mem % init(7_longInt, 1, batchSize = this % batchSize)
-
-    ! Test getting batchSize
-    @assertEqual(this % batchSize, mem % getBatchSize(),'Test getBatchSize() :')
-
-    ! Score in
-    do i=1,10
-      ! Score
-      do j=20*(i-1)+1,20 * i
-        call mem % score(this % scores(j), 1_longInt)
-        call mem % score(this % scoresInt(j), 2_longInt)
-        call mem % score(int(this % scoresInt(j),longInt),3_longInt)
-        call mem % accumulate(this % scores(j), 4_longInt)
-        call mem % accumulate(this % scoresInt(j), 5_longInt)
-        call mem % accumulate(int(this % scoresInt(j),longInt),6_longInt)
-
-      end do
-      ! Close a single bin with diffrent normalisation
-      call mem % closeBin(1.2_defReal, 3_longInt)
-
-      ! Close Cycle
-      call mem % closeCycle(0.7_defReal)
-
-    end do
-
-    ! Get results from bin 1
-    call mem % getResult(res1, 1_longInt)
-    call mem % getResult(res2, STD, 1_longInt)
-
-    @assertEqual(26.401471259728442_defReal, res1, TOL)
-    @assertEqual(26.401471259728442_defReal, res2, TOL)
-    @assertEqual(0.645969443981583_defReal, STD, TOL)
-
-    ! Get results from bin 2
-    call mem % getResult(res1, 2_longInt)
-    call mem % getResult(res2, STD, 2_longInt)
-
-    @assertEqual(623.0_defReal, res1, TOL)
-    @assertEqual(623.0_defReal, res2, TOL)
-    @assertEqual(27.982494527829360_defReal, STD, TOL)
-
-    ! Get results from bin 3
-    call mem % getResult(res1, 3_longInt)
-    call mem % getResult(res2, STD, 3_longInt)
-
-    @assertEqual(1068.0_defReal, res1, TOL)
-    @assertEqual(1068.0_defReal, res2, TOL)
-    @assertEqual(47.969990619136050_defReal, STD, TOL)
-
-    ! Get results from bin 4
-    call mem % getResult(res1, 4_longInt, 200)
-    call mem % getResult(res2, STD, 4_longInt, 200)
-
-    @assertEqual(1.885819375694888_defReal, res1, TOL)
-    @assertEqual(1.885819375694888_defReal, res2, TOL)
-    @assertEqual(0.049102082638055_defReal, STD, TOL)
-
-    ! Get results from bin 5
-    call mem % getResult(res1, 5_longInt, 200)
-    call mem % getResult(res2, STD, 5_longInt, 200)
-
-    @assertEqual(44.500000000000000_defReal, res1, TOL)
-    @assertEqual(44.500000000000000_defReal, res2, TOL)
-    @assertEqual(2.015580019267494_defReal, STD, TOL)
-
-    ! Get results from bin 6
-    call mem % getResult(res1, 6_longInt, 200)
-    call mem % getResult(res2, STD, 6_longInt, 200)
-
-    @assertEqual(44.500000000000000_defReal, res1, TOL)
-    @assertEqual(44.500000000000000_defReal, res2, TOL)
-    @assertEqual(2.015580019267494_defReal, STD, TOL)
-
-    ! Get results from an empty bin 7
-    call mem % getResult(res1, 7_longInt)
-    call mem % getResult(res2, STD, 7_longInt)
-
-    @assertEqual(ZERO, res1, TOL)
-    @assertEqual(ZERO, res2, TOL)
-    @assertEqual(ZERO, STD, TOL)
-
-    ! Get results from invalid bins
-    call mem % getResult(res1, -7_longInt)
-    call mem % getResult(res2, STD, -7_longInt)
-
-    @assertEqual(ZERO, res1, TOL)
-    @assertEqual(ZERO, res2, TOL)
-    @assertEqual(ZERO, STD, TOL)
-
-    call mem % getResult(res1, 8_longInt)
-    call mem % getResult(res2, STD, 8_longInt)
-
-    @assertEqual(ZERO, res1, TOL)
-    @assertEqual(ZERO, res2, TOL)
-    @assertEqual(ZERO, STD, TOL)
-
-    ! Free memor y
-    call mem % kill()
-
-  end subroutine testScoring
-
-  !!
-  !! Test lastCycle
-  !! Ignors test parametrisation
-  !!
-@Test(cases=[1])
-  subroutine testLastCycle(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-    integer(shortInt)                      :: i
-
-    call mem % init(1_longInt, 1, batchSize = 8)
-
-    ! Test getting batchSize
-    @assertEqual(8, mem % getBatchSize(),'Test getBatchSize() :')
-
-    do i=1,16
-      if(i == 8 .or. i == 16) then
-        @assertTrue( mem % lastCycle(), 'In cycle num: '//numToChar(i))
-      else
-        @assertFalse( mem % lastCycle(), 'In cycle num: '//numToChar(i))
-      end if
-      call mem % closeCycle(ONE)
-    end do
-
-    call mem % kill()
-
-  end subroutine testLastCycle
-
-  !!
-  !! Test get score
-  !! Ignore test parametrisation
-  !!
-@Test(cases=[1])
-  subroutine testGetScore(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-    real(defReal),parameter :: TOL = 1.0E-9
-
-    call mem % init(1_longInt, 1)
-
-    call mem % score(ONE,1_longInt)
-    call mem % score(ONE,1_longInt)
-    call mem % score(ONE,1_longInt)
-
-    @assertEqual(3*ONE, mem % getScore(1_longInt), TOL, 'Test getScore, valid bin:')
-    @assertEqual(ZERO, mem % getScore(0_longInt), TOL, 'Test getScore, not +ve bin:')
-    @assertEqual(ZERO, mem % getScore(2_longInt), TOL, 'Test getScore, too large bin:')
-
-  end subroutine testGetScore
-
-  !!
-  !! Test killing uninitialised scoreMemory
-  !!
-@Test(cases=[1])
-  subroutine testKillUnalloc(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-
-    call mem % kill()
-
-  end subroutine testKillUnalloc
-
-end module scoreMemory_test
-!! MATLAB SCRIPT USED TO GENERATE REFERENCE VALUES
-!clear
-!rand = zeros(20,1);
-!seed = 9294;
-!
-!%LCG Params
-!A = 2469;
-!M = 65521;
-!
-!for i=1:1:200
-!  seed = mod(A * seed, M);
-!  rand(i) = seed/M;
-!end
-!
-!% Calculate scores vector
-!scores = 2.0 + sin(pi() .* rand - pi()/2);
-!scoresInt = floor(100.*rand);
-!
-!% Accumulate results
-!resAcc = mean(scores)
-!stdAcc = sqrt(var(scores)./200)
-!
-!resAccInt = mean(scoresInt)
-!stdAccInt = sqrt(var(scoresInt)./200)
-!
-!% Reshape scores
-!scores = reshape(scores,[20,10]);
-!scores = sum(scores,1)* 0.7;
-!res = mean(scores)
-!std = sqrt(var(scores)./10)
-!
-!% Reshape scores
-!scoresInt = reshape(scoresInt,[20,10]);
-!scoresInt = sum(scoresInt,1)* 0.7;
-!resInt = mean(scoresInt)
+module scoreMemory_test
+  use numPrecision
+  use genericProcedures, only : numToChar
+  use scoreMemory_class, only : scoreMemory
+  use pFUnit_mod
+
+  implicit none
+
+@testParameter(constructor = new_testNumber)
+  type, extends(AbstractTestParameter) :: testNumber
+    integer(shortInt) :: i
+  contains
+    procedure :: toString
+  end type testNumber
+
+@testCase(constructor=newTest)
+  type, extends(ParameterizedTestCase) :: test_scoreMemory
+    private
+    integer(longInt)                            :: Ncycles
+    integer(shortInt)                           :: batchSize
+    real(defReal),dimension(:), allocatable     :: scores
+    integer(shortInt), dimension(:),allocatable :: scoresInt
+
+  end type test_scoreMemory
+
+
+contains
+
+  !!
+  !! Build new test parameter form integer
+  !!
+  function new_testNumber(i) result (tstNum)
+    integer(shortInt) :: i
+    type(testNumber)  :: tstNum
+
+    tstNum % i = i
+
+  end function new_testNumber
+
+  !!
+  !! Write test parameter to string
+  !!
+  function toString(this) result(string)
+    class(testNumber), intent(in) :: this
+    character(:), allocatable :: string
+    character(nameLen)        :: str
+
+    write (str,*) this % i
+    string = str
+
+  end function toString
+
+  !!
+  !! Construct test case
+  !!
+  !!
+  !!
+  function newTest(testParam) result(tst)
+    type(testNumber), intent(in)     :: testParam
+    type(test_scoreMemory)           :: tst
+    real(defReal),dimension(200)     :: random
+    integer(shortInt)                :: seed, i
+    integer(shortInt),parameter      :: A = 2469   ! Multiplier of LC PRNG
+    integer(shortInt),parameter      :: M = 65521  ! Modulus of PRNG
+
+    ! Load batchSize
+    tst % batchSize = testParam % i
+    tst % Ncycles   = 10 * tst % batchSize
+
+    ! Generate a vector of 20 pseudo-random numbers in <0;1>
+    ! Generator is not sophisticated but robust
+    seed = 9294
+    do i=1,200
+      seed = mod(A * seed , M)
+      random(i)    = seed / real(M,defReal)
+    end do
+
+    ! Generate some scores and calculate their sum and sum of squares
+    tst  % scores    = TWO + sin(PI * random - PI/2)
+    tst % scoresInt = int(random * 100, shortInt)
+
+  end function newTest
+
+!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+!! PROPER TESTS BEGIN HERE
+!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+
+  !!
+  !! Test acoring for a case with batchSize == 1
+  !! Look at the end of the file to find MATLAB SCRIPT used to generate reference values
+  !!
+@Test(cases=[1])
+  subroutine testScoring(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+    integer(shortInt)                      :: i, j
+    real(defReal)                          :: res1, res2, STD
+    real(defReal), parameter :: TOL = 1.0E-9
+
+    ! Initialise score memory
+    call mem % init(7_longInt, 1, batchSize = this % batchSize)
+
+    ! Test getting batchSize
+    @assertEqual(this % batchSize, mem % getBatchSize(),'Test getBatchSize() :')
+
+    ! Score in
+    do i=1,10
+      ! Score
+      do j=20*(i-1)+1,20 * i
+        call mem % score(this % scores(j), 1_longInt)
+        call mem % score(this % scoresInt(j), 2_longInt)
+        call mem % score(int(this % scoresInt(j),longInt),3_longInt)
+        call mem % accumulate(this % scores(j), 4_longInt)
+        call mem % accumulate(this % scoresInt(j), 5_longInt)
+        call mem % accumulate(int(this % scoresInt(j),longInt),6_longInt)
+
+      end do
+      call mem % reduceBins()
+      ! Close a single bin with diffrent normalisation
+      call mem % closeBin(1.2_defReal, 3_longInt)
+
+      ! Close Cycle
+      call mem % closeCycle(0.7_defReal)
+
+    end do
+
+    ! Get results from bin 1
+    call mem % getResult(res1, 1_longInt)
+    call mem % getResult(res2, STD, 1_longInt)
+
+    @assertEqual(26.401471259728442_defReal, res1, TOL)
+    @assertEqual(26.401471259728442_defReal, res2, TOL)
+    @assertEqual(0.645969443981583_defReal, STD, TOL)
+
+    ! Get results from bin 2
+    call mem % getResult(res1, 2_longInt)
+    call mem % getResult(res2, STD, 2_longInt)
+
+    @assertEqual(623.0_defReal, res1, TOL)
+    @assertEqual(623.0_defReal, res2, TOL)
+    @assertEqual(27.982494527829360_defReal, STD, TOL)
+
+    ! Get results from bin 3
+    call mem % getResult(res1, 3_longInt)
+    call mem % getResult(res2, STD, 3_longInt)
+
+    @assertEqual(1068.0_defReal, res1, TOL)
+    @assertEqual(1068.0_defReal, res2, TOL)
+    @assertEqual(47.969990619136050_defReal, STD, TOL)
+
+    ! Get results from bin 4
+    call mem % getResult(res1, 4_longInt, 200)
+    call mem % getResult(res2, STD, 4_longInt, 200)
+
+    @assertEqual(1.885819375694888_defReal, res1, TOL)
+    @assertEqual(1.885819375694888_defReal, res2, TOL)
+    @assertEqual(0.049102082638055_defReal, STD, TOL)
+
+    ! Get results from bin 5
+    call mem % getResult(res1, 5_longInt, 200)
+    call mem % getResult(res2, STD, 5_longInt, 200)
+
+    @assertEqual(44.500000000000000_defReal, res1, TOL)
+    @assertEqual(44.500000000000000_defReal, res2, TOL)
+    @assertEqual(2.015580019267494_defReal, STD, TOL)
+
+    ! Get results from bin 6
+    call mem % getResult(res1, 6_longInt, 200)
+    call mem % getResult(res2, STD, 6_longInt, 200)
+
+    @assertEqual(44.500000000000000_defReal, res1, TOL)
+    @assertEqual(44.500000000000000_defReal, res2, TOL)
+    @assertEqual(2.015580019267494_defReal, STD, TOL)
+
+    ! Get results from an empty bin 7
+    call mem % getResult(res1, 7_longInt)
+    call mem % getResult(res2, STD, 7_longInt)
+
+    @assertEqual(ZERO, res1, TOL)
+    @assertEqual(ZERO, res2, TOL)
+    @assertEqual(ZERO, STD, TOL)
+
+    ! Get results from invalid bins
+    call mem % getResult(res1, -7_longInt)
+    call mem % getResult(res2, STD, -7_longInt)
+
+    @assertEqual(ZERO, res1, TOL)
+    @assertEqual(ZERO, res2, TOL)
+    @assertEqual(ZERO, STD, TOL)
+
+    call mem % getResult(res1, 8_longInt)
+    call mem % getResult(res2, STD, 8_longInt)
+
+    @assertEqual(ZERO, res1, TOL)
+    @assertEqual(ZERO, res2, TOL)
+    @assertEqual(ZERO, STD, TOL)
+
+    ! Free memory
+    call mem % kill()
+
+  end subroutine testScoring
+
+  !!
+  !! Test lastCycle
+  !! Ignors test parametrisation
+  !!
+@Test(cases=[1])
+  subroutine testLastCycle(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+    integer(shortInt)                      :: i
+
+    call mem % init(1_longInt, 1, batchSize = 8)
+
+    ! Test getting batchSize
+    @assertEqual(8, mem % getBatchSize(),'Test getBatchSize() :')
+
+    do i=1,16
+      if(i == 8 .or. i == 16) then
+        @assertTrue( mem % lastCycle(), 'In cycle num: '//numToChar(i))
+      else
+        @assertFalse( mem % lastCycle(), 'In cycle num: '//numToChar(i))
+      end if
+      call mem % closeCycle(ONE)
+    end do
+
+    call mem % kill()
+
+  end subroutine testLastCycle
+
+  !!
+  !! Test get score
+  !! Ignore test parametrisation
+  !!
+@Test(cases=[1])
+  subroutine testGetScore(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+    real(defReal),parameter :: TOL = 1.0E-9
+
+    call mem % init(1_longInt, 1)
+
+    call mem % score(ONE,1_longInt)
+    call mem % score(ONE,1_longInt)
+    call mem % score(ONE,1_longInt)
+    call mem % reduceBins()
+
+    @assertEqual(3*ONE, mem % getScore(1_longInt), TOL, 'Test getScore, valid bin:')
+    @assertEqual(ZERO, mem % getScore(0_longInt), TOL, 'Test getScore, not +ve bin:')
+    @assertEqual(ZERO, mem % getScore(2_longInt), TOL, 'Test getScore, too large bin:')
+
+  end subroutine testGetScore
+
+  !!
+  !! Test killing uninitialised scoreMemory
+  !!
+@Test(cases=[1])
+  subroutine testKillUnalloc(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+
+    call mem % kill()
+
+  end subroutine testKillUnalloc
+
+end module scoreMemory_test
+!! MATLAB SCRIPT USED TO GENERATE REFERENCE VALUES
+!clear
+!rand = zeros(20,1);
+!seed = 9294;
+!
+!%LCG Params
+!A = 2469;
+!M = 65521;
+!
+!for i=1:1:200
+!  seed = mod(A * seed, M);
+!  rand(i) = seed/M;
+!end
+!
+!% Calculate scores vector
+!scores = 2.0 + sin(pi() .* rand - pi()/2);
+!scoresInt = floor(100.*rand);
+!
+!% Accumulate results
+!resAcc = mean(scores)
+!stdAcc = sqrt(var(scores)./200)
+!
+!resAccInt = mean(scoresInt)
+!stdAccInt = sqrt(var(scoresInt)./200)
+!
+!% Reshape scores
+!scores = reshape(scores,[20,10]);
+!scores = sum(scores,1)* 0.7;
+!res = mean(scores)
+!std = sqrt(var(scores)./10)
+!
+!% Reshape scores
+!scoresInt = reshape(scoresInt,[20,10]);
+!scoresInt = sum(scoresInt,1)* 0.7;
+!resInt = mean(scoresInt)
 !stdInt = sqrt(var(scoresInt)./10)
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index 16d73d255..bec599de6 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -1,449 +1,473 @@
-module scoreMemory_class
-
-  use numPrecision
-  use universalVariables, only : array_pad
-  use genericProcedures,  only : fatalError, numToChar
-  use openmp_func,        only : ompGetMaxThreads, ompGetThreadNum
-
-  implicit none
-  private
-
-  !! Parameters for indexes of per cycle SCORE, Cumulative Sum and Cumulative Sum of squares
-  integer(shortInt), parameter :: CSUM  = 1, &
-                                  CSUM2 = 2
-
-  !! Size of the 2nd Dimension of bins
-  integer(shortInt), parameter :: DIM2 = 2
-
-
-  !!
-  !! scoreMemory is a class that stores space for scores from tallies.
-  !! It is separate from tallyClerks and individual responses to allow:
-  !!   -> Easy writing and (later) reading from file for archivisation of results
-  !!   -> Easy possibility of extention to tally higher moments of result
-  !!   -> Possibility of extension to tally covariance of selected tally bins
-  !!   -> Easy copying and recombination of results for OpenMP shared memory parallelism
-  !!   -> Easy, output format-independent way to perform regression tests
-  !!   -> Easy handling of different batch sizes
-  !!
-  !! For every bin index there are two positions: CSUM, CSUM2. All are initialised to 0.
-  !! For scoring, an array is created with dimension (Nbins,nThreads) to mitigate false sharing.
-  !! On accumulation, this array adds to the normal bin array.
-  !!
-  !! Interface:
-  !!     init(N,idBS): Initialise with integer size N and integer id. Optional integer Batch Size.
-  !!
-  !!     kill(): Elemental. Return to uninitialised state.
-  !!
-  !!     score(score,idx): Score in the bin under idx. FatalError if idx is outside bounds. Score
-  !!         is defReal, shortInt or longInt
-  !!
-  !!     accumulate(score,idx): Accumulate result in cumulative sums in bin under idx. FatalError
-  !!         if idx is outside bounds. Score is defReal, shortInt or longInt.
-  !!
-  !!     getResult(mean, STD, idx, samples): Retrieve mean value and standard deviation of the
-  !!         estimate under idx. Use optional samples to specify number of estimates used to
-  !!         evaluate mean and STD from default, which is number of batches in score memory.
-  !!         STD is optional.
-  !!
-  !!     getScore(idx): Return current value of score from bin under idx. FatalError if idx is
-  !!         outside bounds.
-  !!
-  !!     closeBin(normFactor,idx): Multiplies score under bin by normFactor and accumulates it in
-  !!         cumulative sums. Then sets the bin to zero.
-  !!
-  !!     closeCycle(normFactor): Multiplies all scores by normFactor and accumulates them in
-  !!         cumulative sums. Sets all scors to zero.
-  !!
-  !!     lastCycle(): Return true if the next call to closeCycle will close a batch.
-  !!
-  !!     getBatchSize(): Returns number of cycles that constitute a single batch.
-  !!
-  !! Example use case:
-  !!
-  !!  do batches=1,20
-  !!    do hist=1,10
-  !!      call scoreMem % score(hist,1)        ! Score hist (1,10) in bin 1
-  !!      call scoreMem % accumulate(hist,2)   ! Accumulate hist in CSUMs of bin 2
-  !!    end do
-  !!    call scoreMem % closeCycle(ONE)        ! Close batch without normalisation (factor = ONE)
-  !!  end do
-  !!
-  !!  call scoreMem % getResult(mean,STD,1) ! Get result from bin 1 with STD
-  !!  call scoreMem % getResult(mean,2,200) ! Get mean from bin 2 assuming 200 samples
-  !!
-  !! NOTE:  Following indexing is used in bins class member
-  !!        bins(binIndex,binType) binType is CSUM/CSUM2
-  !! NOTE2: If batch size is not a denominator of cycles scored results accumulated
-  !!        in extra cycles are discarded in current implementation
-  !!
-  type, public :: scoreMemory
-      !private
-      real(defReal),dimension(:,:),allocatable :: bins          !! Space for storing cumul data (2nd dim size is always 2!)
-      real(defReal),dimension(:,:),allocatable :: parallelBins  !! Space for scoring for different threads
-      integer(longInt)                         :: N = 0         !! Size of memory (number of bins)
-      integer(shortInt)                        :: nThreads = 0  !! Number of threads used for parallelBins
-      integer(shortInt)                        :: id            !! Id of the tally
-      integer(shortInt)                        :: batchN = 0    !! Number of Batches
-      integer(shortInt)                        :: cycles = 0    !! Cycles counter
-      integer(shortInt)                        :: batchSize = 1 !! Batch interval size (in cycles)
-  contains
-    ! Interface procedures
-    procedure :: init
-    procedure :: kill
-    generic   :: score      => score_defReal, score_shortInt, score_longInt
-    generic   :: accumulate => accumulate_defReal, accumulate_shortInt, accumulate_longInt
-    generic   :: getResult  => getResult_withSTD, getResult_withoutSTD
-    procedure :: getScore
-    procedure :: closeCycle
-    procedure :: closeBin
-    procedure :: lastCycle
-    procedure :: getBatchSize
-
-    ! Private procedures
-    procedure, private :: score_defReal
-    procedure, private :: score_shortInt
-    procedure, private :: score_longInt
-    procedure, private :: accumulate_defReal
-    procedure, private :: accumulate_shortInt
-    procedure, private :: accumulate_longInt
-    procedure, private :: getResult_withSTD
-    procedure, private :: getResult_withoutSTD
-
-  end type scoreMemory
-
-contains
-
-  !!
-  !! Allocate space for the bins given number of bins N
-  !! Optionaly change batchSize from 1 to any +ve number
-  !!
-  subroutine init(self, N, id, batchSize )
-    class(scoreMemory),intent(inout)      :: self
-    integer(longInt),intent(in)           :: N
-    integer(shortInt),intent(in)          :: id
-    integer(shortInt),optional,intent(in) :: batchSize
-    character(100), parameter :: Here= 'init (scoreMemory_class.f90)'
-
-    ! Allocate space and zero all bins
-    allocate( self % bins(N, DIM2))
-    self % bins = ZERO
-
-    self % nThreads = ompGetMaxThreads()
-
-    ! Note the array padding to avoid false sharing
-    allocate( self % parallelBins(N + array_pad, self % nThreads))
-    self % parallelBins = ZERO
-
-    ! Save size of memory
-    self % N = N
-
-    ! Assign memory id
-    self % id = id
-
-    ! Set batchN, cycles and batchSize to default values
-    self % batchN    = 0
-    self % cycles    = 0
-    self % batchSize = 1
-
-    if(present(batchSize)) then
-      if(batchSize > 0) then
-        self % batchSize = batchSize
-      else
-        call fatalError(Here,'Batch Size of: '// numToChar(batchSize) //' is invalid')
-      end if
-    end if
-
-  end subroutine init
-
-  !!
-  !! Deallocate memory and return to uninitialised state
-  !!
-  subroutine kill(self)
-   class(scoreMemory), intent(inout) :: self
-
-   if(allocated(self % bins)) deallocate(self % bins)
-   if(allocated(self % parallelBins)) deallocate(self % parallelBins)
-   self % N = 0
-   self % nThreads = 0
-   self % batchN = 0
-
-  end subroutine kill
-
-  !!
-  !! Score a result on a given single bin under idx
-  !!
-  subroutine score_defReal(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal), intent(in)         :: score
-    integer(longInt), intent(in)      :: idx
-    integer(shortInt)                 :: thread_idx
-    character(100),parameter :: Here = 'score_defReal (scoreMemory_class.f90)'
-
-    ! Verify bounds for the index
-    if( idx < 0_longInt .or. idx > self % N) then
-      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
-                            & memory with size '//numToChar(self % N))
-    end if
-
-    ! Add the score
-    thread_idx = ompGetThreadNum() + 1
-    self % parallelBins(idx, thread_idx) = &
-            self % parallelBins(idx, thread_idx) + score
-
-  end subroutine score_defReal
-
-  !!
-  !! Score a result with shortInt on a given bin under idx
-  !!
-  subroutine score_shortInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(shortInt), intent(in)     :: score
-    integer(longInt), intent(in)      :: idx
-
-    call self % score_defReal(real(score, defReal), idx)
-
-  end subroutine score_shortInt
-
-  !!
-  !! Score a result with longInt on a given bin under idx
-  !!
-  subroutine score_longInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(longInt), intent(in)      :: score
-    integer(longInt), intent(in)     :: idx
-
-    call self % score_defReal(real(score, defReal), idx)
-
-  end subroutine score_longInt
-
-  !!
-  !! Increment the result directly on cumulative sums
-  !!
-  subroutine accumulate_defReal(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal), intent(in)         :: score
-    integer(longInt), intent(in)      :: idx
-    character(100),parameter :: Here = 'accumulate_defReal (scoreMemory_class.f90)'
-
-    ! Verify bounds for the index
-    if( idx < 0_longInt .or. idx > self % N) then
-      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
-                            & memory with size '//numToChar(self % N))
-    end if
-
-    ! Add the score
-    self % bins(idx, CSUM)  = self % bins(idx, CSUM)  + score
-    self % bins(idx, CSUM2) = self % bins(idx, CSUM2) + score * score
-
-  end subroutine accumulate_defReal
-
-  !!
-  !! Increment the result directly on cumulative sums with shortInt score
-  !!
-  subroutine accumulate_shortInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(shortInt), intent(in)     :: score
-    integer(longInt), intent(in)     :: idx
-
-    call self % accumulate_defReal(real(score, defReal), idx)
-
-  end subroutine accumulate_shortInt
-
-  !!
-  !! Increment the result directly on cumulative sums with longInt score
-  !!
-  subroutine accumulate_longInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(longInt), intent(in)      :: score
-    integer(longInt), intent(in)      :: idx
-
-    call self % accumulate_defReal(real(score, defReal), idx)
-
-  end subroutine accumulate_longInt
-
-  !!
-  !! Close Cycle
-  !! Increments cycle counter and detects end-of-batch
-  !! When batch finishes it normalises all scores by the factor and moves them to CSUMs
-  !!
-  subroutine closeCycle(self, normFactor)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal),intent(in)          :: normFactor
-    integer(longInt)                  :: i
-    real(defReal), save               :: res
-    !$omp threadprivate(res)
-
-    ! Increment Cycle Counter
-    self % cycles = self % cycles + 1
-
-    if(mod(self % cycles, self % batchSize) == 0) then ! Close Batch
-
-      !$omp parallel do
-      do i = 1, self % N
-
-        ! Normalise scores
-        self % parallelBins(i,:) = self % parallelBins(i,:) * normFactor
-        res = sum(self % parallelBins(i,:))
-
-        ! Zero all score bins
-        self % parallelBins(i,:) = ZERO
-
-        ! Increment cumulative sums
-        self % bins(i,CSUM)  = self % bins(i,CSUM) + res
-        self % bins(i,CSUM2) = self % bins(i,CSUM2) + res * res
-
-      end do
-      !$omp end parallel do
-
-      ! Increment batch counter
-      self % batchN = self % batchN + 1
-
-    end if
-
-  end subroutine closeCycle
-
-  !!
-  !! Close Cycle
-  !! Multiplies score in bin under idx by normFactor, accumulates it and sets it to zero
-  !!
-  subroutine closeBin(self, normFactor, idx)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal),intent(in)          :: normFactor
-    integer(longInt), intent(in)      :: idx
-    real(defReal)                     :: res
-    character(100),parameter :: Here = 'closeBin (scoreMemory_class.f90)'
-
-    ! Verify bounds for the index
-    if( idx < 0_longInt .or. idx > self % N) then
-      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
-                            & memory with size '//numToChar(self % N))
-    end if
-
-    ! Normalise score
-    self % parallelBins(idx, :) = self % parallelBins(idx, :) * normFactor
-
-    ! Increment cumulative sum
-    res = sum(self % parallelBins(idx,:))
-    self % bins(idx,CSUM)  = self % bins(idx,CSUM) + res
-    self % bins(idx,CSUM2) = self % bins(idx,CSUM2) + res * res
-
-    ! Zero the score
-    self % parallelBins(idx,:) = ZERO
-
-  end subroutine closeBin
-
-
-  !!
-  !! Return true if next closeCycle will close a batch
-  !!
-  function lastCycle(self) result(isIt)
-    class(scoreMemory), intent(in) :: self
-    logical(defBool)               :: isIt
-
-    isIt =  mod(self % cycles + 1, self % batchSize) == 0
-
-  end function lastCycle
-
-  !!
-  !! Return batchSize
-  !!
-  pure function getBatchSize(self) result(S)
-    class(scoreMemory), intent(in) :: self
-    integer(shortInt)              :: S
-
-    S = self % batchSize
-
-  end function getBatchSize
-
-  !!
-  !! Load mean result and Standard deviation into provided arguments
-  !! Load from bin indicated by idx
-  !! Returns 0 if index is invalid
-  !!
-  elemental subroutine getResult_withSTD(self, mean, STD, idx, samples)
-    class(scoreMemory), intent(in)         :: self
-    real(defReal), intent(out)             :: mean
-    real(defReal),intent(out)              :: STD
-    integer(longInt), intent(in)           :: idx
-    integer(shortInt), intent(in),optional :: samples
-    integer(shortInt)                      :: N
-    real(defReal)                          :: inv_N, inv_Nm1
-
-    !! Verify index. Return 0 if not present
-    if( idx < 0_longInt .or. idx > self % N) then
-      mean = ZERO
-      STD = ZERO
-      return
-    end if
-
-    ! Check if # of samples is provided
-    if( present(samples)) then
-      N = samples
-    else
-      N = self % batchN
-    end if
-
-    ! Calculate mean
-    mean = self % bins(idx, CSUM) / N
-
-    ! Calculate STD
-    inv_N   = ONE / N
-    if( N /= 1) then
-      inv_Nm1 = ONE / (N - 1)
-    else
-      inv_Nm1 = ONE
-    end if
-    STD = self % bins(idx, CSUM2) *inv_N * inv_Nm1 - mean * mean * inv_Nm1
-    STD = sqrt(STD)
-
-  end subroutine getResult_withSTD
-
-  !!
-  !! Load mean result provided argument
-  !! Load from bin indicated by idx
-  !! Returns 0 if index is invalid
-  !!
-  elemental subroutine getResult_withoutSTD(self, mean, idx, samples)
-    class(scoreMemory), intent(in)         :: self
-    real(defReal), intent(out)             :: mean
-    integer(longInt), intent(in)           :: idx
-    integer(shortInt), intent(in),optional :: samples
-    integer(shortInt)                      :: N
-
-    !! Verify index. Return 0 if not present
-    if( idx < 0_longInt .or. idx > self % N) then
-      mean = ZERO
-      return
-    end if
-
-    ! Check if # of samples is provided
-    if( present(samples)) then
-      N = samples
-    else
-      N = self % batchN
-    end if
-
-    ! Calculate mean
-    mean = self % bins(idx, CSUM) / N
-
-  end subroutine getResult_withoutSTD
-
-  !!
-  !! Obtain value of a score in a bin
-  !! Return ZERO for invalid bin address (idx)
-  !!
-  elemental function getScore(self, idx) result (score)
-    class(scoreMemory), intent(in) :: self
-    integer(longInt), intent(in)   :: idx
-    real(defReal)                  :: score
-
-    if(idx <= 0_longInt .or. idx > self % N) then
-      score = ZERO
-    else
-      score = sum(self % parallelBins(idx, :))
-    end if
-
-  end function getScore
-
-end module scoreMemory_class
+module scoreMemory_class
+
+  use numPrecision
+  use universalVariables, only : array_pad
+  use genericProcedures,  only : fatalError, numToChar
+  use openmp_func,        only : ompGetMaxThreads, ompGetThreadNum
+
+  implicit none
+  private
+
+  !! Parameters for indexes of per cycle SCORE, Cumulative Sum and Cumulative Sum of squares
+  integer(shortInt), parameter :: BIN = 1, &
+                                  CSUM  = 2, &
+                                  CSUM2 = 3
+
+  !! Size of the 2nd Dimension of bins
+  integer(shortInt), parameter :: DIM2 = 3
+
+
+  !!
+  !! scoreMemory is a class that stores space for scores from tallies.
+  !! It is separate from tallyClerks and individual responses to allow:
+  !!   -> Easy writing and (later) reading from file for archivisation of results
+  !!   -> Easy possibility of extention to tally higher moments of result
+  !!   -> Possibility of extension to tally covariance of selected tally bins
+  !!   -> Easy copying and recombination of results for OpenMP shared memory parallelism
+  !!   -> Easy, output format-independent way to perform regression tests
+  !!   -> Easy handling of different batch sizes
+  !!
+  !! For every bin index there are two positions: CSUM, CSUM2. All are initialised to 0.
+  !! For scoring, an array is created with dimension (Nbins,nThreads) to mitigate false sharing.
+  !! On accumulation, this array adds to the normal bin array.
+  !!
+  !! Interface:
+  !!     init(N,idBS): Initialise with integer size N and integer id. Optional integer Batch Size.
+  !!
+  !!     kill(): Elemental. Return to uninitialised state.
+  !!
+  !!     score(score,idx): Score in the bin under idx. FatalError if idx is outside bounds. Score
+  !!         is defReal, shortInt or longInt
+  !!
+  !!     accumulate(score,idx): Accumulate result in cumulative sums in bin under idx. FatalError
+  !!         if idx is outside bounds. Score is defReal, shortInt or longInt.
+  !!
+  !!     getResult(mean, STD, idx, samples): Retrieve mean value and standard deviation of the
+  !!         estimate under idx. Use optional samples to specify number of estimates used to
+  !!         evaluate mean and STD from default, which is number of batches in score memory.
+  !!         STD is optional.
+  !!
+  !!     getScore(idx): Return current value of score from bin under idx. FatalError if idx is
+  !!         outside bounds.
+  !!
+  !!     closeBin(normFactor,idx): Multiplies score under bin by normFactor and accumulates it in
+  !!         cumulative sums. Then sets the bin to zero.
+  !!
+  !!     closeCycle(normFactor): Multiplies all scores by normFactor and accumulates them in
+  !!         cumulative sums. Sets all scors to zero.
+  !!
+  !!     lastCycle(): Return true if the next call to closeCycle will close a batch.
+  !!
+  !!     getBatchSize(): Returns number of cycles that constitute a single batch.
+  !!
+  !!     reduceBins(): Move the scores from parallelBins and different processes to bins.
+  !!
+  !! Example use case:
+  !!
+  !!  do batches=1,20
+  !!    do hist=1,10
+  !!      call scoreMem % score(hist,1)        ! Score hist (1,10) in bin 1
+  !!      call scoreMem % accumulate(hist,2)   ! Accumulate hist in CSUMs of bin 2
+  !!    end do
+  !!    call scoreMem % closeCycle(ONE)        ! Close batch without normalisation (factor = ONE)
+  !!  end do
+  !!
+  !!  call scoreMem % getResult(mean,STD,1) ! Get result from bin 1 with STD
+  !!  call scoreMem % getResult(mean,2,200) ! Get mean from bin 2 assuming 200 samples
+  !!
+  !! NOTE:  Following indexing is used in bins class member
+  !!        bins(binIndex,binType) binType is CSUM/CSUM2
+  !! NOTE2: If batch size is not a denominator of cycles scored results accumulated
+  !!        in extra cycles are discarded in current implementation
+  !!
+  type, public :: scoreMemory
+      !private
+      real(defReal),dimension(:,:),allocatable :: bins          !! Space for storing cumul data (2nd dim size is always 2!)
+      real(defReal),dimension(:,:),allocatable :: parallelBins  !! Space for scoring for different threads
+      integer(longInt)                         :: N = 0         !! Size of memory (number of bins)
+      integer(shortInt)                        :: nThreads = 0  !! Number of threads used for parallelBins
+      integer(shortInt)                        :: id            !! Id of the tally
+      integer(shortInt)                        :: batchN = 0    !! Number of Batches
+      integer(shortInt)                        :: cycles = 0    !! Cycles counter
+      integer(shortInt)                        :: batchSize = 1 !! Batch interval size (in cycles)
+  contains
+    ! Interface procedures
+    procedure :: init
+    procedure :: kill
+    generic   :: score      => score_defReal, score_shortInt, score_longInt
+    generic   :: accumulate => accumulate_defReal, accumulate_shortInt, accumulate_longInt
+    generic   :: getResult  => getResult_withSTD, getResult_withoutSTD
+    procedure :: getScore
+    procedure :: closeCycle
+    procedure :: closeBin
+    procedure :: lastCycle
+    procedure :: getBatchSize
+    procedure :: reduceBins
+
+    ! Private procedures
+    procedure, private :: score_defReal
+    procedure, private :: score_shortInt
+    procedure, private :: score_longInt
+    procedure, private :: accumulate_defReal
+    procedure, private :: accumulate_shortInt
+    procedure, private :: accumulate_longInt
+    procedure, private :: getResult_withSTD
+    procedure, private :: getResult_withoutSTD
+
+  end type scoreMemory
+
+contains
+
+  !!
+  !! Allocate space for the bins given number of bins N
+  !! Optionaly change batchSize from 1 to any +ve number
+  !!
+  subroutine init(self, N, id, batchSize )
+    class(scoreMemory),intent(inout)      :: self
+    integer(longInt),intent(in)           :: N
+    integer(shortInt),intent(in)          :: id
+    integer(shortInt),optional,intent(in) :: batchSize
+    character(100), parameter :: Here= 'init (scoreMemory_class.f90)'
+
+    ! Allocate space and zero all bins
+    allocate( self % bins(N, DIM2))
+    self % bins = ZERO
+
+    self % nThreads = ompGetMaxThreads()
+
+    ! Note the array padding to avoid false sharing
+    allocate( self % parallelBins(N + array_pad, self % nThreads))
+    self % parallelBins = ZERO
+
+    ! Save size of memory
+    self % N = N
+
+    ! Assign memory id
+    self % id = id
+
+    ! Set batchN, cycles and batchSize to default values
+    self % batchN    = 0
+    self % cycles    = 0
+    self % batchSize = 1
+
+    if(present(batchSize)) then
+      if(batchSize > 0) then
+        self % batchSize = batchSize
+      else
+        call fatalError(Here,'Batch Size of: '// numToChar(batchSize) //' is invalid')
+      end if
+    end if
+
+  end subroutine init
+
+  !!
+  !! Deallocate memory and return to uninitialised state
+  !!
+  subroutine kill(self)
+   class(scoreMemory), intent(inout) :: self
+
+   if(allocated(self % bins)) deallocate(self % bins)
+   if(allocated(self % parallelBins)) deallocate(self % parallelBins)
+   self % N = 0
+   self % nThreads = 0
+   self % batchN = 0
+
+  end subroutine kill
+
+  !!
+  !! Score a result on a given single bin under idx
+  !!
+  subroutine score_defReal(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal), intent(in)         :: score
+    integer(longInt), intent(in)      :: idx
+    integer(shortInt)                 :: thread_idx
+    character(100),parameter :: Here = 'score_defReal (scoreMemory_class.f90)'
+
+    ! Verify bounds for the index
+    if( idx < 0_longInt .or. idx > self % N) then
+      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
+                            & memory with size '//numToChar(self % N))
+    end if
+
+    ! Add the score
+    thread_idx = ompGetThreadNum() + 1
+    self % parallelBins(idx, thread_idx) = &
+            self % parallelBins(idx, thread_idx) + score
+
+  end subroutine score_defReal
+
+  !!
+  !! Score a result with shortInt on a given bin under idx
+  !!
+  subroutine score_shortInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(shortInt), intent(in)     :: score
+    integer(longInt), intent(in)      :: idx
+
+    call self % score_defReal(real(score, defReal), idx)
+
+  end subroutine score_shortInt
+
+  !!
+  !! Score a result with longInt on a given bin under idx
+  !!
+  subroutine score_longInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(longInt), intent(in)      :: score
+    integer(longInt), intent(in)     :: idx
+
+    call self % score_defReal(real(score, defReal), idx)
+
+  end subroutine score_longInt
+
+  !!
+  !! Increment the result directly on cumulative sums
+  !!
+  subroutine accumulate_defReal(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal), intent(in)         :: score
+    integer(longInt), intent(in)      :: idx
+    character(100),parameter :: Here = 'accumulate_defReal (scoreMemory_class.f90)'
+
+    ! Verify bounds for the index
+    if( idx < 0_longInt .or. idx > self % N) then
+      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
+                            & memory with size '//numToChar(self % N))
+    end if
+
+    ! Add the score
+    self % bins(idx, CSUM)  = self % bins(idx, CSUM)  + score
+    self % bins(idx, CSUM2) = self % bins(idx, CSUM2) + score * score
+
+  end subroutine accumulate_defReal
+
+  !!
+  !! Increment the result directly on cumulative sums with shortInt score
+  !!
+  subroutine accumulate_shortInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(shortInt), intent(in)     :: score
+    integer(longInt), intent(in)     :: idx
+
+    call self % accumulate_defReal(real(score, defReal), idx)
+
+  end subroutine accumulate_shortInt
+
+  !!
+  !! Increment the result directly on cumulative sums with longInt score
+  !!
+  subroutine accumulate_longInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(longInt), intent(in)      :: score
+    integer(longInt), intent(in)      :: idx
+
+    call self % accumulate_defReal(real(score, defReal), idx)
+
+  end subroutine accumulate_longInt
+
+  !!
+  !! Close Cycle
+  !! Increments cycle counter and detects end-of-batch
+  !! When batch finishes it normalises all scores by the factor and moves them to CSUMs
+  !!
+  subroutine closeCycle(self, normFactor)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal),intent(in)          :: normFactor
+    integer(longInt)                  :: i
+    real(defReal), save               :: res
+    !$omp threadprivate(res)
+
+    ! Increment Cycle Counter
+    self % cycles = self % cycles + 1
+
+    if(mod(self % cycles, self % batchSize) == 0) then ! Close Batch
+
+      !$omp parallel do
+      do i = 1, self % N
+
+        ! Normalise scores
+        res = self % bins(i, BIN) * normFactor
+
+        ! Zero all score bins
+        self % bins(i, BIN) = ZERO
+
+        ! Increment cumulative sums
+        self % bins(i,CSUM)  = self % bins(i,CSUM) + res
+        self % bins(i,CSUM2) = self % bins(i,CSUM2) + res * res
+
+      end do
+      !$omp end parallel do
+
+      ! Increment batch counter
+      self % batchN = self % batchN + 1
+
+    end if
+
+  end subroutine closeCycle
+
+  !!
+  !! Close Cycle
+  !! Multiplies score in bin under idx by normFactor, accumulates it and sets it to zero
+  !!
+  subroutine closeBin(self, normFactor, idx)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal),intent(in)          :: normFactor
+    integer(longInt), intent(in)      :: idx
+    real(defReal)                     :: res
+    character(100),parameter :: Here = 'closeBin (scoreMemory_class.f90)'
+
+    ! Verify bounds for the index
+    if( idx < 0_longInt .or. idx > self % N) then
+      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
+                            & memory with size '//numToChar(self % N))
+    end if
+
+    ! Normalise score
+    res = self % bins(idx, BIN) * normFactor
+
+    ! Increment cumulative sum
+    self % bins(idx,CSUM)  = self % bins(idx,CSUM) + res
+    self % bins(idx,CSUM2) = self % bins(idx,CSUM2) + res * res
+
+    ! Zero the score
+    self % bins(idx, BIN) = ZERO
+
+  end subroutine closeBin
+
+
+  !!
+  !! Return true if next closeCycle will close a batch
+  !!
+  function lastCycle(self) result(isIt)
+    class(scoreMemory), intent(in) :: self
+    logical(defBool)               :: isIt
+
+    isIt =  mod(self % cycles + 1, self % batchSize) == 0
+
+  end function lastCycle
+
+  !!
+  !! Return batchSize
+  !!
+  pure function getBatchSize(self) result(S)
+    class(scoreMemory), intent(in) :: self
+    integer(shortInt)              :: S
+
+    S = self % batchSize
+
+  end function getBatchSize
+
+  !!
+  !! Combine the bins across threads and processes
+  !!
+  !! NOTE:
+  !!  Need to be called before reporting CycleEnd to the clerks or calling closeCycle.
+  !!  If it is not the case the results will be incorrect. This is not ideal design
+  !!  and probably should be improved in the future.
+  !!
+  subroutine reduceBins(self)
+    class(scoreMemory), intent(inout) :: self
+    integer(longInt)                  :: i
+
+    !$omp parallel do
+    do i = 1, self % N
+      self % bins(i, BIN) = sum(self % parallelBins(i,:))
+      self % parallelBins(i,:) = ZERO
+    end do
+    !$omp end parallel do
+
+  end subroutine reduceBins
+
+
+  !!
+  !! Load mean result and Standard deviation into provided arguments
+  !! Load from bin indicated by idx
+  !! Returns 0 if index is invalid
+  !!
+  elemental subroutine getResult_withSTD(self, mean, STD, idx, samples)
+    class(scoreMemory), intent(in)         :: self
+    real(defReal), intent(out)             :: mean
+    real(defReal),intent(out)              :: STD
+    integer(longInt), intent(in)           :: idx
+    integer(shortInt), intent(in),optional :: samples
+    integer(shortInt)                      :: N
+    real(defReal)                          :: inv_N, inv_Nm1
+
+    !! Verify index. Return 0 if not present
+    if( idx < 0_longInt .or. idx > self % N) then
+      mean = ZERO
+      STD = ZERO
+      return
+    end if
+
+    ! Check if # of samples is provided
+    if( present(samples)) then
+      N = samples
+    else
+      N = self % batchN
+    end if
+
+    ! Calculate mean
+    mean = self % bins(idx, CSUM) / N
+
+    ! Calculate STD
+    inv_N   = ONE / N
+    if( N /= 1) then
+      inv_Nm1 = ONE / (N - 1)
+    else
+      inv_Nm1 = ONE
+    end if
+    STD = self % bins(idx, CSUM2) *inv_N * inv_Nm1 - mean * mean * inv_Nm1
+    STD = sqrt(STD)
+
+  end subroutine getResult_withSTD
+
+  !!
+  !! Load mean result provided argument
+  !! Load from bin indicated by idx
+  !! Returns 0 if index is invalid
+  !!
+  elemental subroutine getResult_withoutSTD(self, mean, idx, samples)
+    class(scoreMemory), intent(in)         :: self
+    real(defReal), intent(out)             :: mean
+    integer(longInt), intent(in)           :: idx
+    integer(shortInt), intent(in),optional :: samples
+    integer(shortInt)                      :: N
+
+    !! Verify index. Return 0 if not present
+    if( idx < 0_longInt .or. idx > self % N) then
+      mean = ZERO
+      return
+    end if
+
+    ! Check if # of samples is provided
+    if( present(samples)) then
+      N = samples
+    else
+      N = self % batchN
+    end if
+
+    ! Calculate mean
+    mean = self % bins(idx, CSUM) / N
+
+  end subroutine getResult_withoutSTD
+
+  !!
+  !! Obtain value of a score in a bin
+  !! Return ZERO for invalid bin address (idx)
+  !!
+  elemental function getScore(self, idx) result (score)
+    class(scoreMemory), intent(in) :: self
+    integer(longInt), intent(in)   :: idx
+    real(defReal)                  :: score
+
+    if(idx <= 0_longInt .or. idx > self % N) then
+      score = ZERO
+    else
+      score = self % bins(idx, BIN)
+    end if
+
+  end function getScore
+
+end module scoreMemory_class
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index a81b530f0..a12663035 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -731,6 +731,9 @@ recursive subroutine reportCycleEnd(self,end)
       call reportCycleEnd(self % atch, end)
     end if
 
+    ! Reduce the scores across the threads and processes
+    call self % mem % reduceBins()
+
     ! Go through all clerks that request the report
     !$omp parallel do
     do i=1,self % cycleEndClerks % getSize()

From 3f1a34fb6adebaa63b54eb2d4052646f8c9a9920 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Thu, 1 Feb 2024 16:05:50 +0100
Subject: [PATCH 09/27] Add MPI reduction of score bins

---
 SharedModules/mpi_func.f90    | 24 ++++++++++++++++-----
 Tallies/scoreMemory_class.f90 | 30 ++++++++++++++++++++++++++-
 Tallies/tallyAdmin_class.f90  | 39 +++++++++++++++++++----------------
 3 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 45f9301db..313177899 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -3,10 +3,16 @@ module mpi_func
 #ifdef MPI
   use mpi_f08
 #endif
+  use genericProcedures, only : numToChar
+  use errors_mod,        only : fatalError
   implicit none
 
   integer(shortInt), private :: worldSize
   integer(shortInt), private :: rank
+  integer(shortInt), parameter  :: MASTER_RANK = 0
+
+  !! Common MPI types
+  type(MPI_Datatype)            :: MPI_DEFREAL
 
 contains
 
@@ -18,12 +24,18 @@ module mpi_func
   subroutine mpiInit()
 #ifdef MPI
     integer(shortInt) :: ierr
+
     call mpi_init(ierr)
 
     call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
 
     call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
 
+    call mpi_type_create_f90_real(precision(1.0_defReal), range(1.0_defReal), &
+                                  MPI_DEFREAL, ierr)
+
+    call mpi_type_commit(MPI_DEFREAL, ierr)
+
 #else
     worldSize = 1
     rank = 0
@@ -38,7 +50,9 @@ end subroutine mpiInit
   subroutine mpiFinalise()
 #ifdef MPI
     integer(shortInt) :: ierr
-    call MPI_Finalize(ierr)
+
+    call mpi_finalize(ierr)
+
 #endif
   end subroutine mpiFinalise
 
@@ -58,12 +72,12 @@ end function getMPIWorldSize
   !!
   !! The master process is the one with rank 0
   !!
-  function isMaster()
-    logical(defBool) :: isMaster
+  function isMPIMaster()
+    logical(defBool) :: isMPIMaster
 
-    isMaster = (rank == 0)
+    isMPIMaster = (rank == 0)
 
-  end function isMaster
+  end function isMPIMaster
 
   !!
   !! Get MPI rank
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index bec599de6..b0b54711f 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -1,6 +1,7 @@
 module scoreMemory_class
 
   use numPrecision
+  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK
   use universalVariables, only : array_pad
   use genericProcedures,  only : fatalError, numToChar
   use openmp_func,        only : ompGetMaxThreads, ompGetThreadNum
@@ -368,7 +369,9 @@ end function getBatchSize
   !!
   subroutine reduceBins(self)
     class(scoreMemory), intent(inout) :: self
-    integer(longInt)                  :: i
+    integer(longInt)                  :: i, start, chunk
+    integer(shortInt)                 :: error
+    character(100),parameter :: Here = 'reduceBins (scoreMemory_class.f90)'
 
     !$omp parallel do
     do i = 1, self % N
@@ -377,6 +380,31 @@ subroutine reduceBins(self)
     end do
     !$omp end parallel do
 
+    ! Reduce across processes
+    ! We use the parallelBins array as a temporary storage
+#ifdef MPI
+    ! Since the number of bins is limited by 64bit signed integer and the
+    ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
+    ! to split the reduction operation into chunks
+    start = 1
+    chunk = min(self % N, huge(start))
+
+    do while (start <= self % N)
+      call mpi_reduce(self % bins(start : start + chunk - 1, BIN), &
+                      self % parallelBins(start : start + chunk - 1, 1),&
+                      int(chunk, shortInt), &
+                      MPI_DEFREAL, &
+                      MPI_SUM, &
+                      MASTER_RANK, &
+                      MPI_COMM_WORLD, &
+                      error)
+      start = start + chunk
+    end do
+
+    ! Copy the result back to bins
+    self % bins(:,BIN) = self % parallelBins(1:self % N, 1)
+
+#endif
   end subroutine reduceBins
 
 
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index a12663035..05d4360c8 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -2,6 +2,7 @@ module tallyAdmin_class
 
   use numPrecision
   use tallyCodes
+  use mpi_func,               only : isMPIMaster
   use genericProcedures,      only : fatalError, charCmp
   use dictionary_class,       only : dictionary
   use dynArray_class,         only : dynIntArray
@@ -734,30 +735,32 @@ recursive subroutine reportCycleEnd(self,end)
     ! Reduce the scores across the threads and processes
     call self % mem % reduceBins()
 
-    ! Go through all clerks that request the report
-    !$omp parallel do
-    do i=1,self % cycleEndClerks % getSize()
-      idx = self % cycleEndClerks % get(i)
-      call self % tallyClerks(idx) % reportCycleEnd(end, self % mem)
-    end do
-    !$omp end parallel do
+    if (isMPIMaster()) then
+      ! Go through all clerks that request the report
+      !$omp parallel do
+      do i=1,self % cycleEndClerks % getSize()
+        idx = self % cycleEndClerks % get(i)
+        call self % tallyClerks(idx) % reportCycleEnd(end, self % mem)
+      end do
+      !$omp end parallel do
+
+      ! Calculate normalisation factor
+      if( self % normBInAddr /= NO_NORM ) then
+        normScore  = self % mem % getScore(self % normBinAddr)
+        if (normScore == ZERO) then
+          call fatalError(Here, 'Normalisation score from clerk:' // self % normClerkName // 'is 0')
 
-    ! Calculate normalisation factor
-    if( self % normBInAddr /= NO_NORM ) then
-      normScore  = self % mem % getScore(self % normBinAddr)
-      if (normScore == ZERO) then
-        call fatalError(Here, 'Normalisation score from clerk:' // self % normClerkName // 'is 0')
+        end if
+        normFactor = self % normValue / normScore
 
+      else
+        normFactor = ONE
       end if
-      normFactor = self % normValue / normScore
 
-    else
-      normFactor = ONE
+      ! Close cycle multipling all scores by multiplication factor
+      call self % mem % closeCycle(normFactor)
     end if
 
-    ! Close cycle multipling all scores by multiplication factor
-    call self % mem % closeCycle(normFactor)
-
   end subroutine reportCycleEnd
 
   !!

From 911c2b946457baa17be253cefa44a144c06621b9 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Thu, 1 Feb 2024 18:33:55 +0100
Subject: [PATCH 10/27] Initial MPI support in fixed source calculation

Will be reproducable if fixes to source_init are merged.
---
 .../fixedSourcePhysicsPackage_class.f90       | 54 ++++++++++++-------
 SharedModules/mpi_func.f90                    | 38 +++++++++++++
 2 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index b1dbb4e86..0f56f43cd 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -4,6 +4,8 @@ module fixedSourcePhysicsPackage_class
   use universalVariables
   use endfConstants
   use genericProcedures,              only : fatalError, printFishLineR, numToChar, rotateVector
+  use mpi_func,                       only : isMPIMaster, getWorkshare, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD, &
+                                             MASTER_RANK, getOffset
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
   use outputFile_class,               only : outputFile
@@ -80,6 +82,7 @@ module fixedSourcePhysicsPackage_class
     ! Settings
     integer(shortInt)  :: N_cycles
     integer(shortInt)  :: pop
+    integer(shortInt)  :: totalPop
     character(pathLen) :: outputFile
     character(nameLen) :: outputFormat
     integer(shortInt)  :: printSource = 0
@@ -116,7 +119,8 @@ subroutine run(self)
     print *, "/\/\ FIXED SOURCE CALCULATION /\/\"
 
     call self % cycles(self % tally, self % N_cycles)
-    call self % collectResults()
+
+    if (isMPIMaster()) call self % collectResults()
 
     print *
     print *, "\/\/ END OF FIXED SOURCE CALCULATION \/\/"
@@ -130,7 +134,7 @@ subroutine cycles(self, tally, N_cycles)
     class(fixedSourcePhysicsPackage), intent(inout) :: self
     type(tallyAdmin), pointer,intent(inout)         :: tally
     integer(shortInt), intent(in)                   :: N_cycles
-    integer(shortInt)                               :: i, n, nParticles
+    integer(shortInt)                               :: i, n, nParticles, offset
     integer(shortInt), save                         :: j, bufferExtra
     type(particle), save                            :: p, transferP
     type(particleDungeon), save                     :: buffer
@@ -155,6 +159,10 @@ subroutine cycles(self, tally, N_cycles)
     !$omp end parallel
 
     nParticles = self % pop
+    offset = getOffset(self % totalPop)
+
+    ! Skip RNG state forward based on the process rank
+    call self % pRNG % stride(offset)
 
     ! Reset and start timer
     call timerReset(self % timerMain)
@@ -235,7 +243,7 @@ subroutine cycles(self, tally, N_cycles)
       !$omp end parallel do
 
       ! Update RNG
-      call self % pRNG % stride(self % pop)
+      call self % pRNG % stride(self % totalPop)
 
       ! Send end of cycle report
       call tally % reportCycleEnd(self % thisCycle)
@@ -249,14 +257,16 @@ subroutine cycles(self, tally, N_cycles)
       T_toEnd = max(ZERO, end_T - elapsed_T)
 
       ! Display progress
-      call printFishLineR(i)
-      print *
-      print *, 'Source batch: ', numToChar(i), ' of ', numToChar(N_cycles)
-      print *, 'Pop:          ', numToChar(self % pop)
-      print *, 'Elapsed time: ', trim(secToChar(elapsed_T))
-      print *, 'End time:     ', trim(secToChar(end_T))
-      print *, 'Time to end:  ', trim(secToChar(T_toEnd))
-      call tally % display()
+      if (isMPIMaster()) then
+        call printFishLineR(i)
+        print *
+        print *, 'Source batch: ', numToChar(i), ' of ', numToChar(N_cycles)
+        print *, 'Pop:          ', numToChar(self % pop)
+        print *, 'Elapsed time: ', trim(secToChar(elapsed_T))
+        print *, 'End time:     ', trim(secToChar(end_T))
+        print *, 'Time to end:  ', trim(secToChar(T_toEnd))
+        call tally % display()
+      end if
     end do
 
   end subroutine cycles
@@ -272,20 +282,20 @@ subroutine collectResults(self)
     call out % init(self % outputFormat, filename=self % outputFile)
 
     name = 'seed'
-    call out % printValue(self % pRNG % getSeed(),name)
+    call out % printValue(self % pRNG % getSeed(), name)
 
     name = 'pop'
-    call out % printValue(self % pop,name)
+    call out % printValue(self % totalPop, name)
 
     name = 'Source_batches'
-    call out % printValue(self % N_cycles,name)
+    call out % printValue(self % N_cycles, name)
 
     call cpu_time(self % CPU_time_end)
     name = 'Total_CPU_Time'
-    call out % printValue((self % CPU_time_end - self % CPU_time_start),name)
+    call out % printValue((self % CPU_time_end - self % CPU_time_start), name)
 
     name = 'Transport_time'
-    call out % printValue(timerTime(self % timerMain),name)
+    call out % printValue(timerTime(self % timerMain), name)
 
     ! Print tally
     call self % tally % print(out)
@@ -313,7 +323,9 @@ subroutine init(self, dict)
     call cpu_time(self % CPU_time_start)
 
     ! Read calculation settings
-    call dict % get( self % pop,'pop')
+    call dict % get( self % totalPop,'pop')
+    self % pop = getWorkshare(self % totalPop)
+
     call dict % get( self % N_cycles,'cycles')
     call dict % get( nucData, 'XSdata')
     call dict % get( energy, 'dataType')
@@ -354,9 +366,15 @@ subroutine init(self, dict)
       ! Obtain time string and hash it to obtain random seed
       call date_and_time(date, time)
       string = date // time
-      call FNV_1(string,seed_temp)
+      call FNV_1(string, seed_temp)
 
     end if
+
+    ! Brodcast seed to all processes
+#ifdef MPI
+    call MPI_Bcast(seed_temp, 1, MPI_INTEGER, MASTER_RANK, MPI_COMM_WORLD)
+#endif
+
     seed = seed_temp
     call self % pRNG % init(seed)
 
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 313177899..44ffe9d97 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -56,6 +56,44 @@ subroutine mpiFinalise()
 #endif
   end subroutine mpiFinalise
 
+  !!
+  !! Get the share of work N for the current process
+  !!
+  !! Args:
+  !!  N [in] -> Total measure of work (e.g. number of particles)
+  !!
+  !! Result:
+  !!  The share of work for the current process
+  !!
+  function getWorkshare(N) result(share)
+    integer(shortInt), intent(in) :: N
+    integer(shortInt)             :: share
+
+    share = (N + rank) / worldSize
+
+  end function getWorkshare
+
+  !!
+  !! Get starting work offset for the current process
+  !!
+  !! Args:
+  !!  N [in] -> Total measure of work (e.g. number of particles)
+  !!
+  !! Result:
+  !!  The starting offset for the current process: offset = Sum_{i=0}^{rank-1} N_i
+  !!  where N_i is the share of work for process i
+  !!
+  function getOffset(N) result(offset)
+    integer(shortInt), intent(in) :: N
+    integer(shortInt)             :: offset
+    integer(shortInt)             :: remainder
+
+    remainder = mod(N, worldSize)
+    offset = N / worldSize * rank + max(0, remainder + rank - worldSize)
+
+  end function getOffset
+
+
   !!
   !! Get MPI world size
   !!

From d21f4abbcbca00dc6b5da4932171d13cd693af79 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Fri, 2 Feb 2024 14:34:50 +0100
Subject: [PATCH 11/27] Add display_func module

Allows to limit the console I/O to the master process only. Applied
to fixed source calculation only at the moment.
---
 Apps/scone.f90                                |   6 +-
 Geometry/csg_class.f90                        |  55 ++++----
 .../aceDatabase/aceNeutronDatabase_class.f90  |  40 +++---
 PhysicsPackages/eigenPhysicsPackage_class.f90 |   3 +-
 .../fixedSourcePhysicsPackage_class.f90       |  56 ++++----
 .../rayVolPhysicsPackage_class.f90            |   3 +-
 SharedModules/CMakeLists.txt                  |   1 +
 SharedModules/display_func.f90                | 127 ++++++++++++++++++
 SharedModules/errors_mod.f90                  |   7 +-
 SharedModules/genericProcedures.f90           |  56 +-------
 10 files changed, 219 insertions(+), 135 deletions(-)
 create mode 100644 SharedModules/display_func.f90

diff --git a/Apps/scone.f90 b/Apps/scone.f90
index db04c818f..5b8cf96ac 100644
--- a/Apps/scone.f90
+++ b/Apps/scone.f90
@@ -1,7 +1,7 @@
 program scone
 
   use numPrecision
-  use genericProcedures,          only : printStart
+  use display_func,               only : printStart, statusMsg
   use openmp_func,                only : ompSetNumThreads
   use mpi_func,                   only : mpiInit, mpiFinalise
   use commandLineUI,              only : getInputFile, clOptionIsPresent, addClOption, getFromCL
@@ -63,6 +63,6 @@ program scone
 
   call mpiFinalise()
 
-  print *, 'Total calculation time: ', trim(secToChar(timerTime(timerIdx)))
-  print *, 'Have a good day and enjoy your result analysis!'
+  call statusMsg('Total calculation time: ' // trim(secToChar(timerTime(timerIdx))))
+  call statusMsg('Have a good day and enjoy your result analysis!')
 end program scone
diff --git a/Geometry/csg_class.f90 b/Geometry/csg_class.f90
index 9356344ae..2d5f88a1d 100644
--- a/Geometry/csg_class.f90
+++ b/Geometry/csg_class.f90
@@ -1,7 +1,8 @@
 module csg_class
 
   use numPrecision
-  use universalVariables,  only : MAX_COL, HARDCODED_MAX_NEST
+  use universalVariables,  only : HARDCODED_MAX_NEST
+  use display_func,        only : statusMsg, printSectionStart, printSectionEnd, printSeparatorLine
   use genericProcedures,   only : fatalError, numToChar
   use dictionary_class,    only : dictionary
   use charMap_class,       only : charMap
@@ -95,28 +96,28 @@ subroutine init(self, dict, mats, silent)
 
     ! Print beggining
     if (loud) then
-      print *, repeat('<>', MAX_COL/2)
-      print *, "/\/\ READING GEOMETRY /\/\"
+      call printSeparatorLine()
+      call printSectionStart("READING GEOMETRY")
     end if
 
     ! Build Surfaces
-    if (loud) print *, "Building Surfaces"
+    if (loud) call statusMsg("Building Surfaces")
     call self % surfs % init(dict % getDictPtr('surfaces'))
-    if (loud) print *, "DONE!"
+    if (loud) call statusMsg("DONE!")
 
     ! Build Cells
-    if (loud) print *, "Building Cells"
+    if (loud) call statusMsg("Building Cells")
     call self % cells % init(dict % getDictPtr('cells'), self % surfs, mats)
-    if (loud) print *, "DONE!"
+    if (loud) call statusMsg("DONE!")
 
     ! Build Universes
-    if(loud) print *, "Building Universes"
+    if(loud) call statusMsg("Building Universes")
     call self % unis % init(fills, &
                             dict % getDictPtr('universes'),&
                             self % cells, &
                             self % surfs, &
                             mats)
-    if (loud) print *, "DONE!"
+    if (loud) call statusMsg("DONE!")
 
     ! Select Root universe
     if (dict % isPresent('root')) then
@@ -153,13 +154,13 @@ subroutine init(self, dict, mats, silent)
     call surf_ptr % setBC(BC)
 
     ! Check validity of geometry structure
-    if (loud) print *, "CHECKING GEOMETRY:"
+    if (loud) call statusMsg("CHECKING GEOMETRY:")
     ! Check for recursion
     if (fills % hasCycles()) then
       call fatalError(Here ,'There is recursion in the geometry nesting. &
                             &Universe cannot contain itself below itself.')
     else if (loud) then
-      print '(2X, A)', "Recursion in definition - NOT PRESENT!"
+      call statusMsg("  Recursion in definition - NOT PRESENT!")
     end if
 
     ! Check maximum nesting
@@ -168,39 +169,39 @@ subroutine init(self, dict, mats, silent)
       call fatalError(Here,'Nesting level: '// numToChar(nesting) //'> &
                           & max nesting'//numToChar(HARDCODED_MAX_NEST))
     else if (loud) then
-      print '(2X, A)', "Nesting level - FINE!"
+      call statusMsg("  Nesting level - FINE!")
     end if
 
     ! Check outside below root
     if (fills % nestedOutside()) then
       call fatalError(Here,'Cell with outside fill is present below root universe')
     else if (loud) then
-      print '(2X, A)', "Outside below root - NOT PRESENT!"
+      call statusMsg("  Outside below root - NOT PRESENT!")
     end if
 
     ! Build geometry Graph
-    if (loud) print *, "BUILDING GEOMETRY GRAPH"
+    if (loud) call statusMsg("BUILDING GEOMETRY GRAPH")
     call self % graph % init(fills, dict % getDictPtr('graph'))
-    if (loud) print *, "DONE!"
+    if (loud) call statusMsg("DONE!")
 
     ! Print geometry information
     if (loud) then
-      print *, "GEOMETRY INFORMATION "
-      print '(2X, 2A)', "Number of Surfaces: ", numToChar(self % surfs % getSize())
-      print '(2X, 2A)', "Number of Cells: ", numToChar(self % cells % getSize())
-      print '(2X, 2A)', "Number of Universes: ", numToChar(self % unis % getSize())
-      print '(2X, 2A)', "Nesting Levels: ", numToChar(nesting)
-      print '(2X, 2A)', "Unique Cells: ", numToChar(self % graph % uniqueCells)
-      print '(2X, 2A)', "Unused universes (ID): ", numToChar(fills % unusedUniverses())
-      print '(2X, 2A)', "Boundary Surface ID: ", numToChar(surf_ptr % id())
-      print '(2X, 2A)', "Boundary Surface Type: ", surf_ptr % myType()
-      print '(2X, 2A)', "Boundary Conditions: ", numToChar(BC)
+      call statusMsg("GEOMETRY INFORMATION")
+      call statusMsg("  Number of Surfaces: " // numToChar(self % surfs % getSize()))
+      call statusMsg("  Number of Cells: " // numToChar(self % cells % getSize()))
+      call statusMsg("  Number of Universes: " // numToChar(self % unis % getSize()))
+      call statusMsg("  Nesting Levels: " // numToChar(nesting))
+      call statusMsg("  Unique Cells: " // numToChar(self % graph % uniqueCells))
+      call statusMsg("  Unused universes (ID): " // numToChar(fills % unusedUniverses()))
+      call statusMsg("  Boundary Surface ID: " // numToChar(surf_ptr % id()))
+      call statusMsg("  Boundary Surface Type: " // surf_ptr % myType())
+      call statusMsg("  Boundary Conditions: " // numToChar(BC))
     end if
 
     ! Print End
     if (loud) then
-      print *, "\/\/ FINISHED READING GEOMETRY \/\/"
-      print *, repeat('<>', MAX_COL/2)
+      call printSectionEnd("FINISHED READING GEOMETRY")
+      call printSeparatorLine
     end if
 
   end subroutine init
diff --git a/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90 b/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
index afcf57989..87f64c2a8 100644
--- a/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
+++ b/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
@@ -3,12 +3,12 @@ module aceNeutronDatabase_class
   use numPrecision
   use endfConstants
   use universalVariables
-  use errors_mod,         only : fatalError
-  use genericProcedures,  only : numToChar, removeDuplicatesSorted, binarySearch
-  use dictionary_class,   only : dictionary
-  use RNG_class,          only : RNG
-  use charMap_class,      only : charMap
-  use intMap_class,       only : intMap
+  use display_func,      only : statusMsg
+  use genericProcedures, only : fatalError, numToChar
+  use dictionary_class,  only : dictionary
+  use RNG_class,         only : RNG
+  use charMap_class,     only : charMap
+  use intMap_class,      only : intMap
 
   ! Nuclear Data Interfaces
   use nuclearDatabase_inter,        only : nuclearDatabase
@@ -877,11 +877,11 @@ subroutine init(self, dict, ptr, silent )
       end if
 
       if(loud) then
-        print '(A)', "Building: "// trim(name)// " with index: " //numToChar(nucIdx)
-        if (idx1 /= 0 .and. idx2 == 0) &
-                print '(A)', "including S(alpha,beta) table with file: " //trim(name_file1)
-        if (idx1 /= 0 .and. idx2 /= 0) &
-                print '(A)', "including S(alpha,beta) tables with files: " //trim(name_file1)//' '//trim(name_file2)
+        call statusMsg("Building: "// trim(name)// " with index: " //numToChar(nucIdx))
+        if (idx /= 0 .and. idx2 == 0) &
+            call statusMsg("including S(alpha,beta) tables with file: " //trim(name_file))
+        if (idx /= 0 .and. idx2 /= 0) &
+            call statusMsg("including S(alpha,beta) tables with files: " //trim(name_file1)//' '//trim(name_file2))
       end if
 
       call new_neutronACE(ACE, name)
@@ -931,13 +931,13 @@ subroutine init(self, dict, ptr, silent )
       ! Loop over nuclides
       do j = 1, size(mat % nuclides)
         name = self % makeNuclideName(mat % nuclides(j))
-        
+
         ! Find nuclide definition to see if fissile
         ! Also used for checking stochastic mixing bounds
         nucIdxs(j) = nucSet % get(name)
         isFissileMat = isFissileMat .or. self % nuclides(nucIdxs(j)) % isFissile()
-          
-        ! Check to ensure stochastic mixing temperature 
+
+        ! Check to ensure stochastic mixing temperature
         ! is bounded by Sab temperatures
         if (mat % nuclides(j) % sabMix) then
           sabT = self % nuclides(nucIdxs(j)) % getSabTBounds()
@@ -1030,7 +1030,7 @@ subroutine init(self, dict, ptr, silent )
   end subroutine init
 
   !!
-  !! Makes a nuclide's name 
+  !! Makes a nuclide's name
   !! Uniquely identifies nuclides with S(alpha,beta) data
   !! variants, including stochastic mixing
   !!
@@ -1039,25 +1039,25 @@ function makeNuclideName(self, nuclide) result(name)
     type(nuclideInfo), intent(in)         :: nuclide
     character(nameLen)                    :: name
     character(:), allocatable             :: file
-        
+
     name = trim(nuclide % toChar())
 
-    ! Name is extended if there is S(alpha,beta) to 
+    ! Name is extended if there is S(alpha,beta) to
     ! uniquely identify from data without thermal
     ! scattering
     if (nuclide % hasSab) then
- 
+
       file = trim(nuclide % file_Sab1)
       name = trim(name) // '+' // file
       deallocate(file)
-     
+
       ! Attach second Sab file for stochastic mixing
       if (nuclide % sabMix) then
         file = trim(nuclide % file_Sab2)
         name = trim(name) // '#' // file
         deallocate(file)
       end if
- 
+
     end if
 
   end function makeNuclideName
diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 4ede0e8f4..991933fec 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -3,7 +3,8 @@ module eigenPhysicsPackage_class
   use numPrecision
   use universalVariables
   use endfConstants
-  use genericProcedures,              only : fatalError, printFishLineR, numToChar, rotateVector
+  use genericProcedures,              only : fatalError, numToChar, rotateVector
+  use display_func,                   only : printFishLineR
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
   use outputFile_class,               only : outputFile
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index 0f56f43cd..0d7f18558 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -3,9 +3,11 @@ module fixedSourcePhysicsPackage_class
   use numPrecision
   use universalVariables
   use endfConstants
-  use genericProcedures,              only : fatalError, printFishLineR, numToChar, rotateVector
+  use genericProcedures,              only : fatalError, numToChar, rotateVector
+  use display_func,                   only : printFishLineR, statusMsg, printSectionStart, printSectionEnd, &
+                                             printSeparatorLine
   use mpi_func,                       only : isMPIMaster, getWorkshare, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD, &
-                                             MASTER_RANK, getOffset
+                                             MASTER_RANK, getOffset, getMPIRank
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
   use outputFile_class,               only : outputFile
@@ -115,16 +117,17 @@ module fixedSourcePhysicsPackage_class
   subroutine run(self)
     class(fixedSourcePhysicsPackage), intent(inout) :: self
 
-    print *, repeat("<>",50)
-    print *, "/\/\ FIXED SOURCE CALCULATION /\/\"
+    call printSeparatorLine()
+    call printSectionStart("FIXED SOURCE CALCULATION")
 
     call self % cycles(self % tally, self % N_cycles)
 
     if (isMPIMaster()) call self % collectResults()
 
-    print *
-    print *, "\/\/ END OF FIXED SOURCE CALCULATION \/\/"
-    print *
+    call statusMsg("")
+    call printSectionEnd("END OF FIXED SOURCE CALCULATION")
+    call statusMsg("")
+
   end subroutine
 
   !!
@@ -257,16 +260,14 @@ subroutine cycles(self, tally, N_cycles)
       T_toEnd = max(ZERO, end_T - elapsed_T)
 
       ! Display progress
-      if (isMPIMaster()) then
-        call printFishLineR(i)
-        print *
-        print *, 'Source batch: ', numToChar(i), ' of ', numToChar(N_cycles)
-        print *, 'Pop:          ', numToChar(self % pop)
-        print *, 'Elapsed time: ', trim(secToChar(elapsed_T))
-        print *, 'End time:     ', trim(secToChar(end_T))
-        print *, 'Time to end:  ', trim(secToChar(T_toEnd))
-        call tally % display()
-      end if
+      call printFishLineR(i)
+      call statusMsg("")
+      call statusMsg("Source batch: " // numToChar(i) // " of " // numToChar(N_cycles))
+      call statusMsg("Pop:          " // numToChar(self % pop))
+      call statusMsg("Elapsed time: " // trim(secToChar(elapsed_T)))
+      call statusMsg("End time:     " // trim(secToChar(end_T)))
+      call statusMsg("Time to end:  " // trim(secToChar(T_toEnd)))
+      call tally % display()
     end do
 
   end subroutine cycles
@@ -396,11 +397,11 @@ subroutine init(self, dict)
     self % nucData => ndReg_get(self % particleType)
 
     ! Call visualisation
-    if (dict % isPresent('viz')) then
-      print *, "Initialising visualiser"
+    if (dict % isPresent('viz') .and. isMPIMaster()) then
+      call statusMsg("Initialising visualiser")
       tempDict => dict % getDictPtr('viz')
       call viz % init(self % geom, tempDict)
-      print *, "Constructing visualisation"
+      call statusMsg("Constructing visualisation")
       call viz % makeViz()
       call viz % kill()
     endif
@@ -467,13 +468,14 @@ end subroutine kill
   subroutine printSettings(self)
     class(fixedSourcePhysicsPackage), intent(in) :: self
 
-    print *, repeat("<>",50)
-    print *, "/\/\ FIXED SOURCE CALCULATION /\/\"
-    print *, "Source batches:       ", numToChar(self % N_cycles)
-    print *, "Population per batch: ", numToChar(self % pop)
-    print *, "Initial RNG Seed:     ", numToChar(self % pRNG % getSeed())
-    print *
-    print *, repeat("<>",50)
+    call printSeparatorLine()
+    call printSectionStart("FIXED SOURCE CALCULATION SETTINGS")
+    call statusMsg("Source batches:       " // numToChar(self % N_cycles))
+    call statusMsg("Population per batch: " // numToChar(self % pop))
+    call statusMsg("Initial RNG Seed:     " // numToChar(self % pRNG % getSeed()))
+    call statusMsg("")
+    call printSeparatorLine()
+
   end subroutine printSettings
 
 end module fixedSourcePhysicsPackage_class
diff --git a/PhysicsPackages/rayVolPhysicsPackage_class.f90 b/PhysicsPackages/rayVolPhysicsPackage_class.f90
index 26c7560d8..f89b1763d 100644
--- a/PhysicsPackages/rayVolPhysicsPackage_class.f90
+++ b/PhysicsPackages/rayVolPhysicsPackage_class.f90
@@ -2,7 +2,8 @@ module rayVolPhysicsPackage_class
 
   use numPrecision
   use universalVariables
-  use genericProcedures,    only : fatalError, numToChar, rotateVector, printFishLineR
+  use genericProcedures,    only : fatalError, numToChar, rotateVector
+  use display_func,         only : printFishLineR
   use hashFunctions_func,   only : FNV_1
   use dictionary_class,     only : dictionary
   use rng_class,            only : RNG
diff --git a/SharedModules/CMakeLists.txt b/SharedModules/CMakeLists.txt
index 552014699..803ecb85f 100644
--- a/SharedModules/CMakeLists.txt
+++ b/SharedModules/CMakeLists.txt
@@ -1,6 +1,7 @@
 # Add Source Files to the global list
 add_sources( ./genericProcedures.f90
              ./numPrecision.f90
+             ./display_func.f90
              ./endfConstants.f90
              ./universalVariables.f90
              ./hashFunctions_func.f90
diff --git a/SharedModules/display_func.f90 b/SharedModules/display_func.f90
new file mode 100644
index 000000000..ed0d69789
--- /dev/null
+++ b/SharedModules/display_func.f90
@@ -0,0 +1,127 @@
+module display_func
+  use numPrecision
+  use universalVariables, only : MAX_COL
+  use iso_fortran_env,    only : compiler_version
+  use mpi_func,           only : isMPIMaster, getMPIWorldSize
+  use openmp_func,        only : ompGetMaxThreads
+  implicit none
+
+contains
+
+  !!
+  !! Prints Scone ACII Header
+  !!
+  subroutine printStart()
+    if (isMPIMaster()) then
+      print *, repeat(" ><((((*> ", MAX_COL / 10)
+      print *, ''
+      print * ,"        _____ __________  _   ________  "
+      print * ,"       / ___// ____/ __ \/ | / / ____/  "
+      print * ,"       \__ \/ /   / / / /  |/ / __/     "
+      print * ,"      ___/ / /___/ /_/ / /|  / /___     "
+      print * ,"     /____/\____/\____/_/ |_/_____/     "
+      print * , ''
+      print * , ''
+      print * , "Compiler Info :   ", compiler_version()
+#ifdef _OPENMP
+      print '(A, I4)', " OpenMP Threads: ", ompGetMaxThreads()
+#endif
+#ifdef MPI
+      print '(A, I4)', " MPI Processes:  ", getMPIWorldSize()
+#endif
+      print *
+      print *, repeat(" <*((((>< ", MAX_COL / 10)
+    endif
+    ! TODO: Add extra info like date & time
+
+  end subroutine printStart
+
+
+  !!
+  !! Prints line of fishes swimming right with an offset
+  !!
+  !! Args:
+  !!   offset [in] : Number of characters to shift the line right
+  !!
+  subroutine printFishLineR(offset)
+    integer(shortInt),intent(in) :: offset
+    integer(shortInt)            :: offset_L
+    character(MAX_COL), dimension(10), parameter :: lines = [&
+    "" // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><((((*> ", &
+    " " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><((((*>",&
+    "> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><((((*",&
+    "*> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><((((",&
+    "(*> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><(((",&
+    "((*> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><((",&
+    "(((*> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><(",&
+    "((((*> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " ><",&
+    "<((((*> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " >",&
+    "><((((*> " // repeat(" ><((((*> ", MAX_COL / 10 - 1) // " "]
+
+    offset_L = modulo(offset, 10)
+
+    if (isMPIMaster()) then
+      print *, lines(offset_L+1)
+    endif
+
+  end subroutine  printFishLineR
+
+  !!
+  !! Prints a message to the screen
+  !!
+  !! Needs to be used in place of `print *` to ensure that the message is only
+  !! printed by the master process when using MPI
+  !!
+  subroutine statusMsg(msg)
+    character(*), intent(in) :: msg
+    if (isMPIMaster()) then
+      print *, msg
+    endif
+  end subroutine statusMsg
+
+  !!
+  !! Print a section start header
+  !!
+  !!  " /\/\ Section Name /\/\"
+  !!
+  !! Args:
+  !!   name [in] : Name of the section
+  !!
+  subroutine printSectionStart(name)
+    character(*), intent(in) :: name
+
+    if (isMPIMaster()) then
+      print *, "/\/\ " // name // " /\/\"
+    endif
+
+  end subroutine printSectionStart
+
+  !!
+  !! Print a section end header
+  !!
+  !!  " \/\/ Section Name \/\/"
+  !!
+  !! Args:
+  !!   name [in] : Name of the section
+  !!
+  subroutine printSectionEnd(name)
+    character(*), intent(in) :: name
+
+    if (isMPIMaster()) then
+      print *, "\/\/ " // name // " \/\/"
+    endif
+
+  end subroutine printSectionEnd
+
+  !!
+  !! Prints a separator line
+  !!
+  !! "<><><><><>..." to max column width
+  !!
+  subroutine printSeparatorLine()
+    if (isMPIMaster()) then
+      print *, repeat("<>", MAX_COL / 2)
+    endif
+  end subroutine printSeparatorLine
+
+end module display_func
diff --git a/SharedModules/errors_mod.f90 b/SharedModules/errors_mod.f90
index 7e4b16bd6..b028e8870 100644
--- a/SharedModules/errors_mod.f90
+++ b/SharedModules/errors_mod.f90
@@ -9,6 +9,7 @@ module errors_mod
 
   use numPrecision
   use universalVariables, only : MAX_COL
+  use mpi_func,           only : getMPIRank
 
   implicit none
 
@@ -36,7 +37,9 @@ subroutine fatalError(where, why)
 
     ! Upper frame
     write(error_unit, *) repeat('<>', MAX_COL / 2)
-
+#ifdef MPI
+    write(error_unit, *) 'Process rank: ', getMPIRank()
+#endif
     write(error_unit, *) 'Fatal has occurred in:'
     write(error_unit, *) where, new_line('')
     write(error_unit, *) 'Because:'
@@ -63,6 +66,8 @@ subroutine fatalError(where, why)
     write(error_unit, *) repeat('<>', MAX_COL / 2)
 
     ! Terminate with backtrace
+    ! NOTE: We assume MPI implementation will terminate all processes if one of them
+    ! returns with an error code.
     error stop
 
   end subroutine fatalError
diff --git a/SharedModules/genericProcedures.f90 b/SharedModules/genericProcedures.f90
index 04646b2d2..d319e848e 100644
--- a/SharedModules/genericProcedures.f90
+++ b/SharedModules/genericProcedures.f90
@@ -1,10 +1,6 @@
 module genericProcedures
-  ! Intrinsic fortran Modules
-  use iso_fortran_env, only : compiler_version
-
   use numPrecision
-  use openmp_func, only : ompGetMaxThreads
-  use errors_mod,  only : fatalError
+  use errors_mod, only: fatalError
   use endfConstants
   use universalVariables
 
@@ -1431,54 +1427,4 @@ function printParticleType(type) result(str)
     end select
   end function printParticleType
 
-
-  !!
-  !! Prints Scone ACII Header
-  !!
-  subroutine printStart()
-    print *, repeat(" ><((((*> ",10)
-    print *, ''
-    print * ,"        _____ __________  _   ________  "
-    print * ,"       / ___// ____/ __ \/ | / / ____/  "
-    print * ,"       \__ \/ /   / / / /  |/ / __/     "
-    print * ,"      ___/ / /___/ /_/ / /|  / /___     "
-    print * ,"     /____/\____/\____/_/ |_/_____/     "
-    print * , ''
-    print * , ''
-    print * , "Compiler Info :   ", compiler_version()
-#ifdef _OPENMP
-    print '(A, I4)', " OpenMP Threads: ", ompGetMaxThreads()
-#endif
-    print *
-    print *, repeat(" <*((((>< ",10)
-
-    ! TODO: Add extra info like date & time
-
-  end subroutine printStart
-
-  !!
-  !! Prints line of fishes swiming right with an offset
-  !!
-  subroutine printFishLineR(offset)
-    integer(shortInt),intent(in) :: offset
-    integer(shortInt)            :: offset_L
-    character(100), parameter    :: line = repeat(" ><((((*> ",10)
-    character(100),dimension(10), parameter :: lines = [ &
-    " ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*> " ,&
-    "  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>" ,&
-    ">  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*" ,&
-    "*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((" ,&
-    "(*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><(((" ,&
-    "((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((" ,&
-    "(((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><(" ,&
-    "((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><" ,&
-    "<((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  >" ,&
-    "><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  ><((((*>  " ]
-
-    offset_L = modulo(offset,10)
-
-    print *, lines(offset_L+1)
-
-  end subroutine  printFishLineR
-
 end module genericProcedures

From 4acb1400cf2a8b40f79a31c51ec94255a03576ed Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Sat, 17 Feb 2024 19:03:21 +0100
Subject: [PATCH 12/27] Add heap queue data structure

It will be used for sampling without replacement.
---
 DataStructures/CMakeLists.txt           |   6 +-
 DataStructures/Tests/heapQueue_test.f90 |  64 +++++++++
 DataStructures/heapQueue_class.f90      | 183 ++++++++++++++++++++++++
 3 files changed, 251 insertions(+), 2 deletions(-)
 create mode 100644 DataStructures/Tests/heapQueue_test.f90
 create mode 100644 DataStructures/heapQueue_class.f90

diff --git a/DataStructures/CMakeLists.txt b/DataStructures/CMakeLists.txt
index 6aa7a53af..58925bd8b 100644
--- a/DataStructures/CMakeLists.txt
+++ b/DataStructures/CMakeLists.txt
@@ -4,13 +4,15 @@ add_sources( ./dictionary_class.f90
              ./charMap_class.f90
              ./stack_class.f90
              ./dynArray_class.f90
-             ./dictParser_func.f90)
+             ./dictParser_func.f90
+             ./heapQueue_class.f90)
 
 # Add Unit Tests to a global List
 add_unit_tests( ./Tests/dictionary_test.f90
                 ./Tests/intMap_test.f90
                 ./Tests/charMap_test.f90
                 ./Tests/dynArray_test.f90
-                ./Tests/dictParser_test.f90)
+                ./Tests/dictParser_test.f90
+                ./Tests/heapQueue_test.f90)
 
 add_integration_tests( ./Tests/dictParser_iTest.f90)
diff --git a/DataStructures/Tests/heapQueue_test.f90 b/DataStructures/Tests/heapQueue_test.f90
new file mode 100644
index 000000000..c74206f48
--- /dev/null
+++ b/DataStructures/Tests/heapQueue_test.f90
@@ -0,0 +1,64 @@
+module heapQueue_test
+  use numPrecision
+  use heapQueue_class, only: heapQueue
+  use pFUnit_mod
+
+  implicit none
+
+contains
+
+!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+!! PROPER TESTS BEGIN HERE
+!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+
+  !!
+  !! Test with simple sequence without reaching maximum size
+  !!
+@Test
+  subroutine testBelowMaximum()
+    type(heapQueue)   :: hq
+    integer(shortInt) :: i
+    real(defReal), dimension(*), parameter :: seq = [2.0_defReal, 3.0_defReal, 1.0_defReal, 4.0_defReal, 5.0_defReal]
+
+    call hq % init(8)
+
+    @assertEqual(hq % getSize() , 0)
+
+    do i = 1, size(seq)
+      call hq % pushReplace(seq(i))
+    end do
+
+    ! Check that the maximum value is the maximum value in the sequence
+    @assertEqual(hq % maxValue() , maxval(seq))
+    @assertEqual(hq % getSize() , size(seq))
+
+  end subroutine testBelowMaximum
+
+  !!
+  !! Test the intended use case
+  !!
+  !! It is not a very good test but I had no better idea at the moment [MAK]
+  !!
+  @Test
+  subroutine testAboveMaximum()
+    type(heapQueue)   :: hq
+    integer(shortInt) :: i
+    real(defReal)     :: val
+    real(defReal), dimension(*), parameter :: seq = [2.0_defReal, 3.0_defReal, 1.0_defReal, 1.4_defReal, 5.0_defReal]
+
+    call hq % init(3)
+
+    ! Push upper bound
+    call hq % pushReplace(1000.0_defReal)
+
+    do i = 1, size(seq)
+      val = seq(i)
+      if (val < hq % maxValue()) call hq % pushReplace(seq(i))
+    end do
+
+    ! Check that the threshold is correct
+    @assertEqual(hq % maxValue() , 2.0_defReal)
+
+  end subroutine testAboveMaximum
+
+end module heapQueue_test
diff --git a/DataStructures/heapQueue_class.f90 b/DataStructures/heapQueue_class.f90
new file mode 100644
index 000000000..4f657dd33
--- /dev/null
+++ b/DataStructures/heapQueue_class.f90
@@ -0,0 +1,183 @@
+module heapQueue_class
+  use numPrecision
+  use genericProcedures, only : swap
+  use errors_mod,        only : fatalError
+  implicit none
+  private
+
+  !!
+  !! A fixed size heap queue designed to store N smallest values of a sample
+  !!
+  !! The queue is implemented as a binary heap, that is a binary tree with the
+  !! heap property. The heap property is that the value of a parent node is
+  !! larger than the value of both its children. This means that the largest
+  !! value is always at the root of the tree.
+  !!
+  !! The queue is implemented as an array with the root at index 1. The children
+  !! of a node at index i are at 2*i and 2*i + 1. Note that for the even number
+  !! of elements the last node will only have one child.
+  !!
+  !! This data structure is intended to be used for sampling without replacement
+  !! as it allows to find a value of a threshold that selects K values from a
+  !! stream of the N random numbers.
+  !!
+  !! Interface:
+  !!   init        -> Initialise the queue to a given size
+  !!   pushReplace -> Add a value to the queue either growing the size or replacing the largest element
+  !!                  if maximum size was reached
+  !!   maxValue    -> Returns the largest value in the queue
+  !!   getSize     -> Returns the current size of the queue
+  !!
+  type, public :: heapQueue
+    private
+    real(defReal), dimension(:) , allocatable :: heap
+    integer(shortInt)                         :: size
+  contains
+    procedure :: init
+    procedure :: pushReplace
+    procedure :: maxValue
+    procedure :: getSize
+
+    procedure, private :: push
+    procedure, private :: replace
+  end type heapQueue
+
+contains
+
+  !!
+  !! Initialise the queue to a given size
+  !!
+  !! Args:
+  !!  maxSize [in] -> Maximum size of the queue
+  !!
+  subroutine init(self, maxSize)
+    class(heapQueue), intent(out) :: self
+    integer(shortInt), intent(in) :: maxSize
+
+    self % size = 0
+    allocate(self % heap(maxSize))
+
+  end subroutine init
+
+  !!
+  !! Add a value to to queue either growing the size or replacing the largest
+  !!
+  !! Args:
+  !!  value [in] -> Value to add to the queue
+  !!
+  subroutine pushReplace(self, val)
+    class(heapQueue), intent(inout) :: self
+    real(defReal), intent(in)       :: val
+
+    if (self % size < size(self % heap)) then
+      call self % push(val)
+    else
+      call self % replace(val)
+    end if
+
+  end subroutine pushReplace
+
+  !!
+  !! Add a value to the queue
+  !!
+  !! Assumes enough space is available
+  !!
+  !! Args:
+  !!  value [in] -> Value to add to the queue
+  !!
+  subroutine push(self, val)
+    class(heapQueue), intent(inout) :: self
+    real(defReal), intent(in)       :: val
+    integer(shortInt)               :: parent, child
+
+    ! Increase the size of the queue and add the new value
+    self % size = self % size + 1
+    self % heap(self % size) = val
+
+    ! Sift the new value up the heap to restore the heap property
+    child = self % size
+    parent = child / 2
+
+    do while (child > 1 .and. self % heap(parent) < self % heap(child))
+      call swap(self % heap(parent), self % heap(child))
+      child = parent
+      parent = child / 2
+    end do
+
+  end subroutine push
+
+  !!
+  !! Replaces the largest value in the queue with a new value
+  !!
+  !! Args:
+  !!  value [in] -> Value to add to the queue
+  !!
+  subroutine replace(self, val)
+    class(heapQueue), intent(inout) :: self
+    real(defReal), intent(in)       :: val
+    integer(shortInt)               :: parent, child
+
+    self % heap(1) = val
+
+    parent = 1
+    child = 2
+
+    ! Sift down the new value until heap property is restored be comparing
+    ! with the largest child and swapping if necessary
+    do while (child <= self % size)
+
+      ! We need to consider the case where there is only one child in a node
+      ! (can happen when the size is even). If child is the last element it is the
+      ! node with no sibling
+      if (child /= self % size .and. self % heap(child) < self % heap(child + 1)) then
+        child = child + 1
+      end if
+
+      ! If the parent is larger than the larger child we are done
+      if (self % heap(parent) >= self % heap(child)) then
+        return
+      end if
+
+      ! Otherwise swap the parent with the larger child and continue
+      ! the recursion
+      call swap(self % heap(parent), self % heap(child))
+      parent = child
+
+      ! Child points to the start of next level
+      child = parent * 2
+    end do
+
+  end subroutine replace
+
+  !!
+  !! Returns the largest value in the queue
+  !!
+  !! Errors:
+  !!   fatal error if the queue is empty
+  !!
+  function maxValue(self) result(val)
+    class(heapQueue), intent(in) :: self
+    real(defReal)                :: val
+    character(100), parameter :: Here = "maxValue (heapQueue_class.f90)"
+
+    if (self % size == 0) then
+      call fatalError(Here, "The queue is empty!")
+    end if
+
+    val = self % heap(1)
+
+  end function maxValue
+
+  !!
+  !! Get the current size of the queue
+  !!
+  pure function getSize(self) result(size)
+    class(heapQueue), intent(in) :: self
+    integer(shortInt)            :: size
+
+    size = self % size
+
+  end function getSize
+
+
+end module heapQueue_class

From 6ff2f01ad8107fdfe39bf3e5372090269ada01c8 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Tue, 20 Feb 2024 15:14:59 +0100
Subject: [PATCH 13/27] Remove circular dependencies

---
 SharedModules/errors_mod.f90 | 8 ++++----
 SharedModules/mpi_func.f90   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/SharedModules/errors_mod.f90 b/SharedModules/errors_mod.f90
index b028e8870..f924e7ab1 100644
--- a/SharedModules/errors_mod.f90
+++ b/SharedModules/errors_mod.f90
@@ -9,7 +9,7 @@ module errors_mod
 
   use numPrecision
   use universalVariables, only : MAX_COL
-  use mpi_func,           only : getMPIRank
+  !use mpi_func,           only : getMPIRank
 
   implicit none
 
@@ -37,9 +37,9 @@ subroutine fatalError(where, why)
 
     ! Upper frame
     write(error_unit, *) repeat('<>', MAX_COL / 2)
-#ifdef MPI
-    write(error_unit, *) 'Process rank: ', getMPIRank()
-#endif
+! #ifdef MPI
+!     write(error_unit, *) 'Process rank: ', getMPIRank()
+! #endif
     write(error_unit, *) 'Fatal has occurred in:'
     write(error_unit, *) where, new_line('')
     write(error_unit, *) 'Because:'
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 44ffe9d97..973a92ad1 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -3,7 +3,7 @@ module mpi_func
 #ifdef MPI
   use mpi_f08
 #endif
-  use genericProcedures, only : numToChar
+  !use genericProcedures, only : numToChar
   use errors_mod,        only : fatalError
   implicit none
 

From 0392e7645a0fe0c455b4f301dca0a32113400be1 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Tue, 20 Feb 2024 15:15:17 +0100
Subject: [PATCH 14/27] Allow to get the current state of the RNG

---
 RandomNumbers/RNG_class.f90 | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/RandomNumbers/RNG_class.f90 b/RandomNumbers/RNG_class.f90
index 2eeaa6ab8..2baf68aac 100644
--- a/RandomNumbers/RNG_class.f90
+++ b/RandomNumbers/RNG_class.f90
@@ -31,6 +31,7 @@ module rng_class
     procedure :: setSeed
     procedure :: getCount
     procedure :: getSeed
+    procedure :: currentState
   end type rng
 
   !! Parameters
@@ -348,4 +349,15 @@ function getSeed(self) result(seed)
 
   end function getSeed
 
+  !!
+  !! Get current state of the RNG
+  !!
+  function currentState(self) result(state)
+    class(rng), intent(in) :: self
+    integer(int64)         :: state
+
+    state = self % rngSeed
+
+  end function currentState
+
 end module rng_class

From ee97c70e036af9e15b667e51a3bf8a75ace0845a Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Tue, 20 Feb 2024 15:15:45 +0100
Subject: [PATCH 15/27] Fix reproducibility of MPI FS simulation

---
 ParticleObjects/Source/source_inter.f90             | 8 +-------
 PhysicsPackages/fixedSourcePhysicsPackage_class.f90 | 4 ++++
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/ParticleObjects/Source/source_inter.f90 b/ParticleObjects/Source/source_inter.f90
index 4bf415fa5..5e3c141f3 100644
--- a/ParticleObjects/Source/source_inter.f90
+++ b/ParticleObjects/Source/source_inter.f90
@@ -100,7 +100,6 @@ subroutine generate(self, dungeon, n, rand)
       type(particleDungeon), intent(inout) :: dungeon
       integer(shortInt), intent(in)        :: n
       class(RNG), intent(in)               :: rand
-      type(RNG)                            :: originalRNG
       type(RNG), save                      :: pRand
       integer(shortInt)                    :: i
       !$omp threadprivate(pRand)
@@ -108,17 +107,12 @@ subroutine generate(self, dungeon, n, rand)
       ! Set dungeon size to begin
       call dungeon % setSize(n)
 
-      ! Move back in the sequence to avoid reusing few first random numbers
-      ! in transport
-      originalRNG = rand
-      call originalRNG % stride(-n)
-
       ! Generate n particles to populate dungeon
       ! TODO: advance the rand after source generation!
       !       This should prevent reusing RNs during transport
       !$omp parallel do
       do i = 1, n
-        pRand = originalRNG
+        pRand = rand
         call pRand % stride(i)
         call dungeon % replace(self % sampleParticle(pRand), i)
       end do
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index 0d7f18558..dde670e16 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -175,6 +175,10 @@ subroutine cycles(self, tally, N_cycles)
 
       ! Send start of cycle report
       call self % fixedSource % generate(self % thisCycle, nParticles, self % pRNG)
+
+      ! Update RNG after source generation
+      call self % pRNG % stride(self % totalPop)
+
       if(self % printSource == 1) then
         call self % thisCycle % printToFile(trim(self % outputFile)//'_source'//numToChar(i))
       end if

From adc1e62022a930b2e552a3591f93d3358d627b39 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Tue, 20 Feb 2024 16:58:22 +0100
Subject: [PATCH 16/27] Fix MPI free compilation

---
 PhysicsPackages/fixedSourcePhysicsPackage_class.f90 | 6 ++++--
 SharedModules/mpi_func.f90                          | 2 ++
 Tallies/scoreMemory_class.f90                       | 7 ++++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index dde670e16..9a2a06772 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -6,8 +6,10 @@ module fixedSourcePhysicsPackage_class
   use genericProcedures,              only : fatalError, numToChar, rotateVector
   use display_func,                   only : printFishLineR, statusMsg, printSectionStart, printSectionEnd, &
                                              printSeparatorLine
-  use mpi_func,                       only : isMPIMaster, getWorkshare, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD, &
-                                             MASTER_RANK, getOffset, getMPIRank
+  use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
+#ifdef MPI
+  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD
+#endif
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
   use outputFile_class,               only : outputFile
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 973a92ad1..5badff4bd 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -12,7 +12,9 @@ module mpi_func
   integer(shortInt), parameter  :: MASTER_RANK = 0
 
   !! Common MPI types
+#ifdef MPI
   type(MPI_Datatype)            :: MPI_DEFREAL
+#endif
 
 contains
 
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index b0b54711f..3afa0e604 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -1,7 +1,9 @@
 module scoreMemory_class
 
   use numPrecision
+#ifdef MPI
   use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK
+#endif
   use universalVariables, only : array_pad
   use genericProcedures,  only : fatalError, numToChar
   use openmp_func,        only : ompGetMaxThreads, ompGetThreadNum
@@ -369,8 +371,11 @@ end function getBatchSize
   !!
   subroutine reduceBins(self)
     class(scoreMemory), intent(inout) :: self
-    integer(longInt)                  :: i, start, chunk
+    integer(longInt)                  :: i
+#ifdef MPI
+    integer(longInt)                  :: start, chunk
     integer(shortInt)                 :: error
+#endif
     character(100),parameter :: Here = 'reduceBins (scoreMemory_class.f90)'
 
     !$omp parallel do

From a233e0ce8abf03d6bbc11eea1aebac03b09fc8f4 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Tue, 20 Feb 2024 18:05:18 +0100
Subject: [PATCH 17/27] Make the MPI sync optional for scoreMemory

Results from not master processes are not combined, hence they are lost
at the moment.
---
 Tallies/scoreMemory_class.f90 | 62 ++++++++++++++++++++---------------
 Tallies/tallyAdmin_class.f90  | 10 ++++--
 2 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index 3afa0e604..a3d408ac9 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -2,7 +2,7 @@ module scoreMemory_class
 
   use numPrecision
 #ifdef MPI
-  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK
+  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK, getMPIWorldSize
 #endif
   use universalVariables, only : array_pad
   use genericProcedures,  only : fatalError, numToChar
@@ -85,14 +85,15 @@ module scoreMemory_class
   !!
   type, public :: scoreMemory
       !private
-      real(defReal),dimension(:,:),allocatable :: bins          !! Space for storing cumul data (2nd dim size is always 2!)
-      real(defReal),dimension(:,:),allocatable :: parallelBins  !! Space for scoring for different threads
-      integer(longInt)                         :: N = 0         !! Size of memory (number of bins)
-      integer(shortInt)                        :: nThreads = 0  !! Number of threads used for parallelBins
-      integer(shortInt)                        :: id            !! Id of the tally
-      integer(shortInt)                        :: batchN = 0    !! Number of Batches
-      integer(shortInt)                        :: cycles = 0    !! Cycles counter
-      integer(shortInt)                        :: batchSize = 1 !! Batch interval size (in cycles)
+      real(defReal),dimension(:,:),allocatable :: bins              !! Space for storing cumul data (2nd dim size is always 3!)
+      real(defReal),dimension(:,:),allocatable :: parallelBins      !! Space for scoring for different threads
+      integer(longInt)                         :: N = 0             !! Size of memory (number of bins)
+      integer(shortInt)                        :: nThreads = 0      !! Number of threads used for parallelBins
+      integer(shortInt)                        :: id                !! Id of the tally
+      integer(shortInt)                        :: batchN = 0        !! Number of Batches
+      integer(shortInt)                        :: cycles = 0        !! Cycles counter
+      integer(shortInt)                        :: batchSize = 1     !! Batch interval size (in cycles)
+      logical(defBool)                         :: reduced = .false. !! True if bins have been reduced
   contains
     ! Interface procedures
     procedure :: init
@@ -125,11 +126,12 @@ module scoreMemory_class
   !! Allocate space for the bins given number of bins N
   !! Optionaly change batchSize from 1 to any +ve number
   !!
-  subroutine init(self, N, id, batchSize )
+  subroutine init(self, N, id, batchSize, reduced)
     class(scoreMemory),intent(inout)      :: self
     integer(longInt),intent(in)           :: N
     integer(shortInt),intent(in)          :: id
     integer(shortInt),optional,intent(in) :: batchSize
+    logical(defBool),optional,intent(in)  :: reduced
     character(100), parameter :: Here= 'init (scoreMemory_class.f90)'
 
     ! Allocate space and zero all bins
@@ -161,6 +163,10 @@ subroutine init(self, N, id, batchSize )
       end if
     end if
 
+    if (present(reduced)) then
+      self % reduced = reduced
+    end if
+
   end subroutine init
 
   !!
@@ -391,24 +397,26 @@ subroutine reduceBins(self)
     ! Since the number of bins is limited by 64bit signed integer and the
     ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
     ! to split the reduction operation into chunks
-    start = 1
-    chunk = min(self % N, huge(start))
-
-    do while (start <= self % N)
-      call mpi_reduce(self % bins(start : start + chunk - 1, BIN), &
-                      self % parallelBins(start : start + chunk - 1, 1),&
-                      int(chunk, shortInt), &
-                      MPI_DEFREAL, &
-                      MPI_SUM, &
-                      MASTER_RANK, &
-                      MPI_COMM_WORLD, &
-                      error)
-      start = start + chunk
-    end do
-
-    ! Copy the result back to bins
-    self % bins(:,BIN) = self % parallelBins(1:self % N, 1)
+    !if (getMPIWorldSize() /= 1) then
+    if (self % reduced) then
+      start = 1
+      chunk = min(self % N, huge(start))
+
+      do while (start <= self % N)
+        call mpi_reduce(self % bins(start : start + chunk - 1, BIN), &
+                        self % parallelBins(start : start + chunk - 1, 1),&
+                        int(chunk, shortInt), &
+                        MPI_DEFREAL, &
+                        MPI_SUM, &
+                        MASTER_RANK, &
+                        MPI_COMM_WORLD, &
+                        error)
+        start = start + chunk
+      end do
 
+      ! Copy the result back to bins
+      self % bins(:,BIN) = self % parallelBins(1:self % N, 1)
+    end if
 #endif
   end subroutine reduceBins
 
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index 05d4360c8..77436bd8d 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -84,6 +84,7 @@ module tallyAdmin_class
   !!   #norm    clerk3;          #       ! Clerk should be size 1 (first bin of clerk is normalised)
   !!   #normVal 13.0;            #       ! Must be present if "norm" is present
   !!   #batchSize   4;           #       ! Default value 1
+  !!   #mpiSync     1;           #       ! Default value 0
   !!   clerk1 { <clerk definition here> }
   !!   clerk2 { <clerk definition here> }
   !!   clerk3 { <clerk definition here> }
@@ -99,6 +100,7 @@ module tallyAdmin_class
     integer(longInt)   :: normBinAddr  = NO_NORM
     real(defReal)      :: normValue
     character(nameLen) :: normClerkName
+    logical(defBool)   :: mpiSync
 
     ! Clerks and clerks name map
     type(tallyClerkSlot),dimension(:),allocatable :: tallyClerks
@@ -214,10 +216,14 @@ subroutine init(self,dict)
     ! Read batching size
     call dict % getOrDefault(cyclesPerBatch,'batchSize',1)
 
+    ! Check if the bins need to be synchronised across MPI processes
+    ! at the end of each batch
+    call dict % getOrDefault(self % mpiSync, 'mpiSync', .false.)
+
     ! Initialise score memory
     ! Calculate required size.
     memSize = sum( self % tallyClerks % getSize() )
-    call self % mem % init(memSize, 1, batchSize = cyclesPerBatch)
+    call self % mem % init(memSize, 1, batchSize = cyclesPerBatch, reduced = self % mpiSync)
 
     ! Assign memory locations to the clerks
     memLoc = 1
@@ -735,7 +741,7 @@ recursive subroutine reportCycleEnd(self,end)
     ! Reduce the scores across the threads and processes
     call self % mem % reduceBins()
 
-    if (isMPIMaster()) then
+    if (isMPIMaster() .or. .not. self % mpiSync ) then
       ! Go through all clerks that request the report
       !$omp parallel do
       do i=1,self % cycleEndClerks % getSize()

From b3bd9c1ed339b0bd7fd427cafd9b6fb409313f8b Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Tue, 20 Feb 2024 20:16:59 +0100
Subject: [PATCH 18/27] Add default normalisation for results

Fixes a bug for synchronised scoreMemory. The buffer value after
transfer in parallelBin was not properly set to 0 again.
---
 Tallies/scoreMemory_class.f90 |  7 +++++--
 Tallies/tallyAdmin_class.f90  | 19 ++++++++++++++-----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index a3d408ac9..879ba0428 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -2,7 +2,7 @@ module scoreMemory_class
 
   use numPrecision
 #ifdef MPI
-  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK, getMPIWorldSize
+  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK
 #endif
   use universalVariables, only : array_pad
   use genericProcedures,  only : fatalError, numToChar
@@ -397,7 +397,6 @@ subroutine reduceBins(self)
     ! Since the number of bins is limited by 64bit signed integer and the
     ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
     ! to split the reduction operation into chunks
-    !if (getMPIWorldSize() /= 1) then
     if (self % reduced) then
       start = 1
       chunk = min(self % N, huge(start))
@@ -416,6 +415,10 @@ subroutine reduceBins(self)
 
       ! Copy the result back to bins
       self % bins(:,BIN) = self % parallelBins(1:self % N, 1)
+
+      ! Clean buffer in parallel bin
+      self % parallelBins(1:self % N, 1) = ZERO
+
     end if
 #endif
   end subroutine reduceBins
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index 77436bd8d..a2173261c 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -25,6 +25,7 @@ module tallyAdmin_class
 
   !! Parameters
   integer(longInt), parameter :: NO_NORM = -17_longInt
+  real(defReal), parameter    :: TARGET_START_WGT = 1.0e4_defReal
 
   !!
   !! TallyAdmin is responsible for:
@@ -100,6 +101,8 @@ module tallyAdmin_class
     integer(longInt)   :: normBinAddr  = NO_NORM
     real(defReal)      :: normValue
     character(nameLen) :: normClerkName
+
+    ! Parallelisation settings
     logical(defBool)   :: mpiSync
 
     ! Clerks and clerks name map
@@ -214,7 +217,7 @@ subroutine init(self,dict)
     end if
 
     ! Read batching size
-    call dict % getOrDefault(cyclesPerBatch,'batchSize',1)
+    call dict % getOrDefault(cyclesPerBatch,'batchSize', 1)
 
     ! Check if the bins need to be synchronised across MPI processes
     ! at the end of each batch
@@ -222,11 +225,11 @@ subroutine init(self,dict)
 
     ! Initialise score memory
     ! Calculate required size.
-    memSize = sum( self % tallyClerks % getSize() )
+    memSize = sum( self % tallyClerks % getSize() ) + 1
     call self % mem % init(memSize, 1, batchSize = cyclesPerBatch, reduced = self % mpiSync)
 
     ! Assign memory locations to the clerks
-    memLoc = 1
+    memLoc = 2
     do i=1,size(self % tallyClerks)
       call self % tallyClerks(i) % setMemAddress(memLoc)
       memLoc = memLoc + self % tallyClerks(i) % getSize()
@@ -240,10 +243,13 @@ subroutine init(self,dict)
 
     ! Read name of normalisation clerks if present
     if(dict % isPresent('norm')) then
-      call dict % get(self % normClerkName,'norm')
-      call dict % get(self % normValue,'normVal')
+      call dict % get(self % normClerkName, 'norm')
+      call dict % get(self % normValue, 'normVal')
       i = self % clerksNameMap % get(self % normClerkName)
       self % normBinAddr = self % tallyClerks(i) % getMemAddress()
+    else
+      self % normBinAddr = 1
+      self % normValue = TARGET_START_WGT
     end if
 
   end subroutine init
@@ -693,6 +699,9 @@ recursive subroutine reportCycleStart(self, start)
     integer(shortInt), save            :: idx
     !$omp threadprivate(idx)
 
+    ! Add the particles in this cycle to starting population of the batch
+    call self % mem % score(start % popWeight(), 1_longInt)
+
     ! Call attachment
     if(associated(self % atch)) then
       call reportCycleStart(self % atch, start)

From 12cbdb61e2e4cb1b52d17944f876c9216774cd90 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Thu, 22 Feb 2024 12:57:23 +0100
Subject: [PATCH 19/27] Add option to collect distrubuted tally results

---
 .../fixedSourcePhysicsPackage_class.f90       |   3 +
 SharedModules/mpi_func.f90                    |   6 +
 Tallies/scoreMemory_class.f90                 | 121 ++++++++++++++----
 Tallies/tallyAdmin_class.f90                  |  18 +++
 4 files changed, 124 insertions(+), 24 deletions(-)

diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index 9a2a06772..2a3b43050 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -124,6 +124,9 @@ subroutine run(self)
 
     call self % cycles(self % tally, self % N_cycles)
 
+    ! Collect results from other processes
+    call self % tally % collectDistributed()
+
     if (isMPIMaster()) call self % collectResults()
 
     call statusMsg("")
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 5badff4bd..53fe05998 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -14,6 +14,7 @@ module mpi_func
   !! Common MPI types
 #ifdef MPI
   type(MPI_Datatype)            :: MPI_DEFREAL
+  type(MPI_Datatype)            :: MPI_SHORTINT
 #endif
 
 contains
@@ -33,11 +34,16 @@ subroutine mpiInit()
 
     call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
 
+    ! Define MPI Type for DEFREAL
     call mpi_type_create_f90_real(precision(1.0_defReal), range(1.0_defReal), &
                                   MPI_DEFREAL, ierr)
 
     call mpi_type_commit(MPI_DEFREAL, ierr)
 
+    ! Define MPI Type for SHORTINT
+    call mpi_type_create_f90_integer(range(1_shortInt), MPI_SHORTINT, ierr)
+    call mpi_type_commit(MPI_SHORTINT, ierr)
+
 #else
     worldSize = 1
     rank = 0
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index 879ba0428..b835d41bd 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -2,7 +2,7 @@ module scoreMemory_class
 
   use numPrecision
 #ifdef MPI
-  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK
+  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK, isMPIMaster, MPI_SHORTINT
 #endif
   use universalVariables, only : array_pad
   use genericProcedures,  only : fatalError, numToChar
@@ -107,6 +107,7 @@ module scoreMemory_class
     procedure :: lastCycle
     procedure :: getBatchSize
     procedure :: reduceBins
+    procedure :: collectDistributed
 
     ! Private procedures
     procedure, private :: score_defReal
@@ -378,10 +379,6 @@ end function getBatchSize
   subroutine reduceBins(self)
     class(scoreMemory), intent(inout) :: self
     integer(longInt)                  :: i
-#ifdef MPI
-    integer(longInt)                  :: start, chunk
-    integer(shortInt)                 :: error
-#endif
     character(100),parameter :: Here = 'reduceBins (scoreMemory_class.f90)'
 
     !$omp parallel do
@@ -398,31 +395,107 @@ subroutine reduceBins(self)
     ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
     ! to split the reduction operation into chunks
     if (self % reduced) then
-      start = 1
-      chunk = min(self % N, huge(start))
-
-      do while (start <= self % N)
-        call mpi_reduce(self % bins(start : start + chunk - 1, BIN), &
-                        self % parallelBins(start : start + chunk - 1, 1),&
-                        int(chunk, shortInt), &
-                        MPI_DEFREAL, &
-                        MPI_SUM, &
-                        MASTER_RANK, &
-                        MPI_COMM_WORLD, &
-                        error)
-        start = start + chunk
-      end do
+      call reduceArray(self % bins(:,BIN), self % parallelBins(:,1))
+    end if
+#endif
 
-      ! Copy the result back to bins
-      self % bins(:,BIN) = self % parallelBins(1:self % N, 1)
+  end subroutine reduceBins
 
-      ! Clean buffer in parallel bin
-      self % parallelBins(1:self % N, 1) = ZERO
+  !!
+  !! Reduce the accumulated results (csum and csum2) from different MPI processes
+  !!
+  !! If the bins are `reduced` that is scores are collected at the end of each cycle,
+  !! then this subroutine does nothing. Otherwise it collects the results from different
+  !! processes and stores them in the master process.
+  !!
+  !! The estimates from each process are treated as independent simulation, thus the
+  !! cumulative sums are added together and the batch count is summed.
+  !!
+  subroutine collectDistributed(self)
+    class(scoreMemory), intent(inout) :: self
+#ifdef MPI
+    integer(shortInt)                 :: error, buffer
+
+    if (.not. self % reduced) then
+      ! Reduce the batch count
+      ! Note we need to use size 1 arrays to fit the interface of mpi_reduce, which expects
+      ! to be given arrays
+      call mpi_reduce(self % batchN, buffer, 1, MPI_SHORTINT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      if (isMPIMaster()) then
+        self % batchN = buffer
+      else
+        self % batchN = 0
+      end if
+
+      ! Reduce the cumulative sums
+      call reduceArray(self % bins(:,CSUM), self % parallelBins(:,1))
 
+      ! Reduce the cumulative sums of squares
+      call reduceArray(self % bins(:,CSUM2), self % parallelBins(:,1))
     end if
+
 #endif
-  end subroutine reduceBins
+  end subroutine collectDistributed
 
+  !!
+  !! Reduce the array across different processes
+  !!
+  !! Wrapper around MPI_Reduce to support arrays of defReal larger than 2^31
+  !! This function is only defined if MPI is enabled
+  !!
+  !! Args:
+  !!   data [inout] ->  Array with the date to be reduced
+  !!   buffer [inout] -> Buffer to store the reduced data (must be same size or larger than data)
+  !!
+  !! Result:
+  !!   The sum of the data across all processes in stored on master process `data`
+  !!   The buffer is set to ZERO on all processes ( only 1:size(data) range)!
+  !!
+  !! Errors:
+  !!   fatalError if size of the buffer is insufficient
+  !!
+#ifdef MPI
+  subroutine reduceArray(data, buffer)
+    real(defReal), dimension(:), intent(inout) :: data
+    real(defReal), dimension(:), intent(inout) :: buffer
+    integer(longInt)                           :: N, chunk, start
+    integer(shortInt)                          :: error
+    character(100),parameter :: Here = 'reduceArray (scoreMemory_class.f90)'
+    ! We need to be careful to support sizes larger than 2^31
+    N = size(data, kind=longInt)
+
+    ! Check if the buffer is large enough
+    if (size(buffer, kind=longInt) < N) then
+      call fatalError(Here, 'Buffer is too small to store the reduced data')
+    end if
+
+    ! Since the number of bins is limited by 64bit signed integer and the
+    ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
+    ! to split the reduction operation into chunks
+    start = 1
+
+    do while (start <= N)
+      chunk = min(N - start + 1, int(huge(1_shortInt), longInt))
+
+      call mpi_reduce(data(start : start + chunk - 1), &
+                      buffer(start : start + chunk - 1), &
+                      int(chunk, shortInt), &
+                      MPI_DEFREAL, &
+                      MPI_SUM, &
+                      MASTER_RANK, &
+                      MPI_COMM_WORLD, &
+                      error)
+      start = start + chunk
+    end do
+
+    ! Copy the result back to data
+    data = buffer(1:N)
+
+    ! Clean buffer
+    buffer(1:N) = ZERO
+
+  end subroutine reduceArray
+#endif
 
   !!
   !! Load mean result and Standard deviation into provided arguments
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index a2173261c..063bef142 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -147,6 +147,7 @@ module tallyAdmin_class
 
     ! Interaction procedures
     procedure :: getResult
+    procedure :: collectDistributed
 
     ! Display procedures
     procedure :: display
@@ -815,6 +816,23 @@ pure subroutine getResult(self, res, name)
 
   end subroutine getResult
 
+  !!
+  !!
+  !!
+  recursive subroutine collectDistributed(self)
+    class(tallyAdmin), intent(inout) :: self
+
+    ! Call attachment
+    if(associated(self % atch)) then
+      call collectDistributed(self % atch)
+    end if
+
+    if (.not. self % mpiSync) then
+      call self % mem % collectDistributed()
+    end if
+
+  end subroutine collectDistributed
+
   !!
   !! Append sorting array identified with the code with tallyClerk idx
   !!

From 35ef86d660aa976b29eb900746530369040eb975 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Thu, 22 Feb 2024 14:53:41 +0100
Subject: [PATCH 20/27] Use statusMsg for display output of tallyClerks

---
 Tallies/TallyClerks/collisionClerk_class.f90  |   3 +-
 .../collisionProbabilityClerk_class.f90       |   3 +-
 .../TallyClerks/dancoffBellClerk_class.f90    |   6 +-
 Tallies/TallyClerks/keffAnalogClerk_class.f90 |   6 +-
 .../TallyClerks/keffImplicitClerk_class.f90   |   7 +-
 Tallies/TallyClerks/mgXsClerk_class.f90       |   3 +-
 .../TallyClerks/shannonEntropyClerk_class.f90 |   3 +-
 Tallies/TallyClerks/simpleFMClerk_class.f90   |   3 +-
 Tallies/TallyClerks/trackClerk_class.f90      | 585 +++++++++---------
 9 files changed, 318 insertions(+), 301 deletions(-)

diff --git a/Tallies/TallyClerks/collisionClerk_class.f90 b/Tallies/TallyClerks/collisionClerk_class.f90
index 67cec49af..88847d3b8 100644
--- a/Tallies/TallyClerks/collisionClerk_class.f90
+++ b/Tallies/TallyClerks/collisionClerk_class.f90
@@ -4,6 +4,7 @@ module collisionClerk_class
   use tallyCodes
   use universalVariables
   use genericProcedures,          only : fatalError
+  use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle, particleState
   use outputFile_class,           only : outputFile
@@ -247,7 +248,7 @@ subroutine display(self, mem)
     class(collisionClerk), intent(in)  :: self
     type(scoreMemory), intent(in)      :: mem
 
-    print *, 'collisionClerk does not support display yet'
+    call statusMsg('collisionClerk does not support display yet')
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/collisionProbabilityClerk_class.f90 b/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
index b6182d5cb..9350b2df6 100644
--- a/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
+++ b/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
@@ -3,6 +3,7 @@ module collisionProbabilityClerk_class
   use numPrecision
   use tallyCodes
   use genericProcedures,          only : fatalError
+  use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle, particleState
   use particleDungeon_class,      only : particleDungeon
@@ -332,7 +333,7 @@ subroutine display(self, mem)
     class(collisionProbabilityClerk), intent(in) :: self
     type(scoreMemory), intent(in)                :: mem
 
-    print *, 'collisionProbabilityClerk does not support display yet'
+    call statusMsg('collisionProbabilityClerk does not support display yet')
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/dancoffBellClerk_class.f90 b/Tallies/TallyClerks/dancoffBellClerk_class.f90
index 3cf626505..f1f85eaf7 100644
--- a/Tallies/TallyClerks/dancoffBellClerk_class.f90
+++ b/Tallies/TallyClerks/dancoffBellClerk_class.f90
@@ -3,7 +3,9 @@ module dancoffBellClerk_class
   use numPrecision
   use tallyCodes
   use endfConstants
+  use universalVariables,         only : MAX_COL
   use genericProcedures,          only : fatalError, hasDuplicates
+  use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle, particleState
   use particleDungeon_class,      only : particleDungeon
@@ -252,11 +254,13 @@ subroutine display(self, mem)
     class(dancoffBellClerk), intent(in) :: self
     type(scoreMemory), intent(in)       :: mem
     real(defReal)                       :: mean, STD
+    character(MAX_COL)                  :: buffer
 
     call mem % getResult(mean, STD, self % getMemAddress() + D_EFF)
 
     ! Print to console
-    print '(A,ES15.5,A,ES15.5)', 'Dancoff-Bell: ', mean, ' +/- ', STD
+    write (buffer, '(A,ES15.5,A,ES15.5)') 'Dancoff-Bell: ', mean, ' +/- ', STD
+    call statusMsg(buffer)
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/keffAnalogClerk_class.f90 b/Tallies/TallyClerks/keffAnalogClerk_class.f90
index 0d24c1ff7..c4a4df264 100644
--- a/Tallies/TallyClerks/keffAnalogClerk_class.f90
+++ b/Tallies/TallyClerks/keffAnalogClerk_class.f90
@@ -2,8 +2,10 @@ module keffAnalogClerk_class
 
   use numPrecision
   use tallyCodes
+  use universalVariables,    only : MAX_COL
   use dictionary_class,      only : dictionary
   use genericProcedures,     only : fatalError
+  use display_func,          only : statusMsg
   use particle_class,        only : particle
   use particleDungeon_class, only : particleDungeon
   use outputFile_class,      only : outputFile
@@ -177,11 +179,13 @@ subroutine display(self, mem)
     class(keffAnalogClerk), intent(in)  :: self
     type(scoreMemory), intent(in)       :: mem
     real(defReal)                       :: k, STD
+    character(MAX_COL)                  :: buffer
 
     call mem % getResult(k, STD, self % getMemAddress())
 
     ! Print estimates to a console
-    print '(A,F8.5,A,F8.5)', 'k-eff (analog): ',  k, ' +/- ', STD
+    write(buffer, '(A,F8.5,A,F8.5)') 'k-eff (analog): ',  k, ' +/- ', STD
+    call statusMsg(buffer)
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/keffImplicitClerk_class.f90 b/Tallies/TallyClerks/keffImplicitClerk_class.f90
index 008c50e02..7ff26dd76 100644
--- a/Tallies/TallyClerks/keffImplicitClerk_class.f90
+++ b/Tallies/TallyClerks/keffImplicitClerk_class.f90
@@ -3,8 +3,9 @@ module keffImplicitClerk_class
   use numPrecision
   use tallyCodes
   use endfConstants
-  use universalVariables
+  use universalVariables,         only : MAX_COL
   use genericProcedures,          only : fatalError, charCmp
+  use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle
   use particleDungeon_class,      only : particleDungeon
@@ -314,12 +315,14 @@ subroutine display(self, mem)
     class(keffImplicitClerk), intent(in)  :: self
     type(scoreMemory), intent(in)         :: mem
     real(defReal)                         :: k, STD
+    character(MAX_COL)                    :: buffer
 
     ! Get current k-eff estimate
     call mem % getResult(k, STD, self % getMemAddress() + K_EFF)
 
     ! Print to console
-    print '(A,F8.5,A,F8.5)', 'k-eff (implicit): ', k, ' +/- ', STD
+    write (buffer, '(A,F8.5,A,F8.5)') 'k-eff (implicit): ', k, ' +/- ', STD
+    call statusMsg(buffer)
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/mgXsClerk_class.f90 b/Tallies/TallyClerks/mgXsClerk_class.f90
index 80ea66204..2048f9caf 100644
--- a/Tallies/TallyClerks/mgXsClerk_class.f90
+++ b/Tallies/TallyClerks/mgXsClerk_class.f90
@@ -5,6 +5,7 @@ module mgXsClerk_class
   use endfConstants
   use universalVariables
   use genericProcedures,          only : fatalError
+  use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle, particleState
   use particleDungeon_class,      only : particleDungeon
@@ -915,7 +916,7 @@ subroutine display(self, mem)
     class(mgXsClerk), intent(in)  :: self
     type(scoreMemory), intent(in) :: mem
 
-    print *, 'mgXsClerk does not support display yet'
+    call statusMsg('mgXsClerk does not support display yet')
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/shannonEntropyClerk_class.f90 b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
index 40e293280..9202f6418 100644
--- a/Tallies/TallyClerks/shannonEntropyClerk_class.f90
+++ b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
@@ -3,6 +3,7 @@ module shannonEntropyClerk_class
   use numPrecision
   use tallyCodes
   use genericProcedures,          only : fatalError
+  use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle, particleState
   use particleDungeon_class,      only : particleDungeon
@@ -164,7 +165,7 @@ subroutine display(self, mem)
     class(shannonEntropyClerk), intent(in) :: self
     type(scoreMemory), intent(in)    :: mem
 
-    print *, 'shannonEntropyClerk does not support display yet'
+    call statusMsg('shannonEntropyClerk does not support display yet')
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/simpleFMClerk_class.f90 b/Tallies/TallyClerks/simpleFMClerk_class.f90
index a4e026aee..e52aeb57b 100644
--- a/Tallies/TallyClerks/simpleFMClerk_class.f90
+++ b/Tallies/TallyClerks/simpleFMClerk_class.f90
@@ -5,6 +5,7 @@ module simpleFMClerk_class
   use endfConstants
   use universalVariables
   use genericProcedures,          only : fatalError
+  use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle, particleState
   use particleDungeon_class,      only : particleDungeon
@@ -354,7 +355,7 @@ subroutine display(self, mem)
     class(simpleFMClerk), intent(in) :: self
     type(scoreMemory), intent(in)    :: mem
 
-    print *, 'simpleFMClerk does not support display yet'
+    call statusMsg('simpleFMClerk does not support display yet')
 
   end subroutine display
 
diff --git a/Tallies/TallyClerks/trackClerk_class.f90 b/Tallies/TallyClerks/trackClerk_class.f90
index 70ff771a0..6b624da31 100644
--- a/Tallies/TallyClerks/trackClerk_class.f90
+++ b/Tallies/TallyClerks/trackClerk_class.f90
@@ -1,292 +1,293 @@
-module trackClerk_class
-
-  use numPrecision
-  use tallyCodes
-  use genericProcedures,          only : fatalError
-  use dictionary_class,           only : dictionary
-  use particle_class,             only : particle, particleState
-  use outputFile_class,           only : outputFile
-  use scoreMemory_class,          only : scoreMemory
-  use tallyClerk_inter,           only : tallyClerk, kill_super => kill
-
-  ! Nuclear Data interface
-  use nuclearDatabase_inter,      only : nuclearDatabase
-
-  ! Tally Filters
-  use tallyFilter_inter,          only : tallyFilter
-  use tallyFilterFactory_func,    only : new_tallyFilter
-
-  ! Tally Maps
-  use tallyMap_inter,             only : tallyMap
-  use tallyMapFactory_func,       only : new_tallyMap
-
-  ! Tally Responses
-  use tallyResponseSlot_class,    only : tallyResponseSlot
-
-  implicit none
-  private
-
-  !!
-  !! Track length estimator of reaction rates
-  !! Calculates flux weighted integrals from paticles travelled paths
-  !!
-  !! Private Members:
-  !!   filter   -> Space to store tally Filter
-  !!   map      -> Space to store tally Map
-  !!   response -> Array of responses
-  !!   width    -> Number of responses (# of result bins for each map position)
-  !!
-  !! NOTE that maps and filters refer to the pre-transition particle state! This
-  !! would lead to wrong results in case of spatial grids (e.g. a Cartesian x-y grid)
-  !!
-  !! Interface
-  !!   tallyClerk Interface
-  !!
-  !! SAMPLE DICTIOANRY INPUT:
-  !!
-  !! myTrackClerk {
-  !!   type trackClerk;
-  !!   # filter { <tallyFilter definition> } #
-  !!   # map    { <tallyMap definition>    } #
-  !!   response (resName1 #resName2 ... #)
-  !!   resName1 { <tallyResponse definition> }
-  !!   #resNamew { <tallyResponse definition #
-  !! }
-  !!
-  type, public, extends(tallyClerk) :: trackClerk
-    private
-    ! Filter, Map & Vector of Responses
-    class(tallyFilter), allocatable                  :: filter
-    class(tallyMap), allocatable                     :: map
-    type(tallyResponseSlot),dimension(:),allocatable :: response
-
-    ! Usefull data
-    integer(shortInt)  :: width = 0
-
-  contains
-    ! Procedures used during build
-    procedure  :: init
-    procedure  :: kill
-    procedure  :: validReports
-    procedure  :: getSize
-
-    ! File reports and check status -> run-time procedures
-    procedure  :: reportPath
-
-    ! Output procedures
-    procedure  :: display
-    procedure  :: print
-
-  end type trackClerk
-
-contains
-
-  !!
-  !! Initialise clerk from dictionary and name
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine init(self, dict, name)
-    class(trackClerk), intent(inout)            :: self
-    class(dictionary), intent(in)               :: dict
-    character(nameLen), intent(in)              :: name
-    character(nameLen),dimension(:),allocatable :: responseNames
-    integer(shortInt)                           :: i
-
-    ! Assign name
-    call self % setName(name)
-
-    ! Load filetr
-    if( dict % isPresent('filter')) then
-      call new_tallyFilter(self % filter, dict % getDictPtr('filter'))
-    end if
-
-    ! Load map
-    if( dict % isPresent('map')) then
-      call new_tallyMap(self % map, dict % getDictPtr('map'))
-    end if
-
-    ! Get names of response dictionaries
-    call dict % get(responseNames,'response')
-
-    ! Load responses
-    allocate(self % response(size(responseNames)))
-    do i=1, size(responseNames)
-      call self % response(i) % init(dict % getDictPtr( responseNames(i) ))
-    end do
-
-    ! Set width
-    self % width = size(responseNames)
-
-  end subroutine init
-
-  !!
-  !! Return to uninitialised state
-  !!
-  elemental subroutine kill(self)
-    class(trackClerk), intent(inout) :: self
-
-    ! Superclass
-    call kill_super(self)
-
-    ! Kill and deallocate filter
-    if(allocated(self % filter)) then
-      deallocate(self % filter)
-    end if
-
-    ! Kill and deallocate map
-    if(allocated(self % map)) then
-      call self % map % kill()
-      deallocate(self % map)
-    end if
-
-    ! Kill and deallocate responses
-    if(allocated(self % response)) then
-      deallocate(self % response)
-    end if
-
-    self % width = 0
-
-  end subroutine kill
-
-  !!
-  !! Returns array of codes that represent diffrent reports
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  function validReports(self) result(validCodes)
-    class(trackClerk),intent(in)               :: self
-    integer(shortInt),dimension(:),allocatable :: validCodes
-
-    validCodes = [path_CODE]
-
-  end function validReports
-
-  !!
-  !! Return memory size of the clerk
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  elemental function getSize(self) result(S)
-    class(trackClerk), intent(in)     :: self
-    integer(shortInt)                 :: S
-
-    S = size(self % response)
-    if(allocated(self % map)) S = S * self % map % bins(0)
-
-  end function getSize
-
-  !!
-  !! Process incoming track length report
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine reportPath(self, p, L, xsData,mem)
-    class(trackClerk), intent(inout)      :: self
-    class(particle), intent(in)           :: p
-    real(defReal), intent(in)             :: L
-    class(nuclearDatabase), intent(inout) :: xsData
-    type(scoreMemory), intent(inout)      :: mem
-    type(particleState)                   :: state
-    type(particle)                        :: pTmp
-    integer(shortInt)                     :: binIdx, i
-    integer(longInt)                      :: adrr
-    real(defReal)                         :: scoreVal, flx
-    character(100), parameter :: Here =' reportPath (trackClerk_class.f90)'
-
-    ! Get pre-transition particle state
-    state = p % prePath
-
-    ! Check if within filter
-    if(allocated( self % filter)) then
-      if(self % filter % isFail(state)) return
-    end if
-
-    ! Find bin index
-    if(allocated(self % map)) then
-      binIdx = self % map % map(state)
-    else
-      binIdx = 1
-    end if
-
-    ! Return if invalid bin index
-    if (binIdx == 0) return
-
-    ! Calculate bin address
-    adrr = self % getMemAddress() + self % width * (binIdx -1)  - 1
-
-    ! tranfer information about Prestate material to a temporary particle
-    pTmp = p
-    pTmp % coords % matIdx = state % matIdx
-
-    ! Calculate flux sample L = path travelled
-    flx = L
-
-    ! Append all bins
-    do i=1,self % width
-      scoreVal = self % response(i) % get(pTmp, xsData) * p % w * flx
-      call mem % score(scoreVal, adrr + i)
-    end do
-
-  end subroutine reportPath
-
-  !!
-  !! Display convergance progress on the console
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine display(self, mem)
-    class(trackClerk), intent(in)  :: self
-    type(scoreMemory), intent(in)      :: mem
-
-    print *, 'trackClerk does not support display yet'
-
-  end subroutine display
-
-  !!
-  !! Write contents of the clerk to output file
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine print(self, outFile, mem)
-    class(trackClerk), intent(in)              :: self
-    class(outputFile), intent(inout)           :: outFile
-    type(scoreMemory), intent(in)              :: mem
-    real(defReal)                              :: val, std
-    integer(shortInt)                          :: i
-    integer(shortInt),dimension(:),allocatable :: resArrayShape
-    character(nameLen)                         :: name
-
-    ! Begin block
-    call outFile % startBlock(self % getName())
-
-    ! If track clerk has map print map information
-    if( allocated(self % map)) then
-      call self % map % print(outFile)
-    end if
-
-    ! Write results.
-    ! Get shape of result array
-    if(allocated(self % map)) then
-      resArrayShape = [size(self % response), self % map % binArrayShape()]
-    else
-      resArrayShape = [size(self % response)]
-    end if
-
-    ! Start array
-    name ='Res'
-    call outFile % startArray(name, resArrayShape)
-
-    ! Print results to the file
-    do i=1,product(resArrayShape)
-      call mem % getResult(val, std, self % getMemAddress() - 1 + i)
-      call outFile % addResult(val,std)
-
-    end do
-
-    call outFile % endArray()
-    call outFile % endBlock()
-
-  end subroutine print
-
-end module trackClerk_class
+module trackClerk_class
+
+  use numPrecision
+  use tallyCodes
+  use genericProcedures,          only : fatalError
+  use display_func,               only : statusMsg
+  use dictionary_class,           only : dictionary
+  use particle_class,             only : particle, particleState
+  use outputFile_class,           only : outputFile
+  use scoreMemory_class,          only : scoreMemory
+  use tallyClerk_inter,           only : tallyClerk, kill_super => kill
+
+  ! Nuclear Data interface
+  use nuclearDatabase_inter,      only : nuclearDatabase
+
+  ! Tally Filters
+  use tallyFilter_inter,          only : tallyFilter
+  use tallyFilterFactory_func,    only : new_tallyFilter
+
+  ! Tally Maps
+  use tallyMap_inter,             only : tallyMap
+  use tallyMapFactory_func,       only : new_tallyMap
+
+  ! Tally Responses
+  use tallyResponseSlot_class,    only : tallyResponseSlot
+
+  implicit none
+  private
+
+  !!
+  !! Track length estimator of reaction rates
+  !! Calculates flux weighted integrals from paticles travelled paths
+  !!
+  !! Private Members:
+  !!   filter   -> Space to store tally Filter
+  !!   map      -> Space to store tally Map
+  !!   response -> Array of responses
+  !!   width    -> Number of responses (# of result bins for each map position)
+  !!
+  !! NOTE that maps and filters refer to the pre-transition particle state! This
+  !! would lead to wrong results in case of spatial grids (e.g. a Cartesian x-y grid)
+  !!
+  !! Interface
+  !!   tallyClerk Interface
+  !!
+  !! SAMPLE DICTIOANRY INPUT:
+  !!
+  !! myTrackClerk {
+  !!   type trackClerk;
+  !!   # filter { <tallyFilter definition> } #
+  !!   # map    { <tallyMap definition>    } #
+  !!   response (resName1 #resName2 ... #)
+  !!   resName1 { <tallyResponse definition> }
+  !!   #resNamew { <tallyResponse definition #
+  !! }
+  !!
+  type, public, extends(tallyClerk) :: trackClerk
+    private
+    ! Filter, Map & Vector of Responses
+    class(tallyFilter), allocatable                  :: filter
+    class(tallyMap), allocatable                     :: map
+    type(tallyResponseSlot),dimension(:),allocatable :: response
+
+    ! Usefull data
+    integer(shortInt)  :: width = 0
+
+  contains
+    ! Procedures used during build
+    procedure  :: init
+    procedure  :: kill
+    procedure  :: validReports
+    procedure  :: getSize
+
+    ! File reports and check status -> run-time procedures
+    procedure  :: reportPath
+
+    ! Output procedures
+    procedure  :: display
+    procedure  :: print
+
+  end type trackClerk
+
+contains
+
+  !!
+  !! Initialise clerk from dictionary and name
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine init(self, dict, name)
+    class(trackClerk), intent(inout)            :: self
+    class(dictionary), intent(in)               :: dict
+    character(nameLen), intent(in)              :: name
+    character(nameLen),dimension(:),allocatable :: responseNames
+    integer(shortInt)                           :: i
+
+    ! Assign name
+    call self % setName(name)
+
+    ! Load filetr
+    if( dict % isPresent('filter')) then
+      call new_tallyFilter(self % filter, dict % getDictPtr('filter'))
+    end if
+
+    ! Load map
+    if( dict % isPresent('map')) then
+      call new_tallyMap(self % map, dict % getDictPtr('map'))
+    end if
+
+    ! Get names of response dictionaries
+    call dict % get(responseNames,'response')
+
+    ! Load responses
+    allocate(self % response(size(responseNames)))
+    do i=1, size(responseNames)
+      call self % response(i) % init(dict % getDictPtr( responseNames(i) ))
+    end do
+
+    ! Set width
+    self % width = size(responseNames)
+
+  end subroutine init
+
+  !!
+  !! Return to uninitialised state
+  !!
+  elemental subroutine kill(self)
+    class(trackClerk), intent(inout) :: self
+
+    ! Superclass
+    call kill_super(self)
+
+    ! Kill and deallocate filter
+    if(allocated(self % filter)) then
+      deallocate(self % filter)
+    end if
+
+    ! Kill and deallocate map
+    if(allocated(self % map)) then
+      call self % map % kill()
+      deallocate(self % map)
+    end if
+
+    ! Kill and deallocate responses
+    if(allocated(self % response)) then
+      deallocate(self % response)
+    end if
+
+    self % width = 0
+
+  end subroutine kill
+
+  !!
+  !! Returns array of codes that represent diffrent reports
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  function validReports(self) result(validCodes)
+    class(trackClerk),intent(in)               :: self
+    integer(shortInt),dimension(:),allocatable :: validCodes
+
+    validCodes = [path_CODE]
+
+  end function validReports
+
+  !!
+  !! Return memory size of the clerk
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  elemental function getSize(self) result(S)
+    class(trackClerk), intent(in)     :: self
+    integer(shortInt)                 :: S
+
+    S = size(self % response)
+    if(allocated(self % map)) S = S * self % map % bins(0)
+
+  end function getSize
+
+  !!
+  !! Process incoming track length report
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine reportPath(self, p, L, xsData,mem)
+    class(trackClerk), intent(inout)      :: self
+    class(particle), intent(in)           :: p
+    real(defReal), intent(in)             :: L
+    class(nuclearDatabase), intent(inout) :: xsData
+    type(scoreMemory), intent(inout)      :: mem
+    type(particleState)                   :: state
+    type(particle)                        :: pTmp
+    integer(shortInt)                     :: binIdx, i
+    integer(longInt)                      :: adrr
+    real(defReal)                         :: scoreVal, flx
+    character(100), parameter :: Here =' reportPath (trackClerk_class.f90)'
+
+    ! Get pre-transition particle state
+    state = p % prePath
+
+    ! Check if within filter
+    if(allocated( self % filter)) then
+      if(self % filter % isFail(state)) return
+    end if
+
+    ! Find bin index
+    if(allocated(self % map)) then
+      binIdx = self % map % map(state)
+    else
+      binIdx = 1
+    end if
+
+    ! Return if invalid bin index
+    if (binIdx == 0) return
+
+    ! Calculate bin address
+    adrr = self % getMemAddress() + self % width * (binIdx -1)  - 1
+
+    ! tranfer information about Prestate material to a temporary particle
+    pTmp = p
+    pTmp % coords % matIdx = state % matIdx
+
+    ! Calculate flux sample L = path travelled
+    flx = L
+
+    ! Append all bins
+    do i=1,self % width
+      scoreVal = self % response(i) % get(pTmp, xsData) * p % w * flx
+      call mem % score(scoreVal, adrr + i)
+    end do
+
+  end subroutine reportPath
+
+  !!
+  !! Display convergance progress on the console
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine display(self, mem)
+    class(trackClerk), intent(in)  :: self
+    type(scoreMemory), intent(in)      :: mem
+
+    call statusMsg('trackClerk does not support display yet')
+
+  end subroutine display
+
+  !!
+  !! Write contents of the clerk to output file
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine print(self, outFile, mem)
+    class(trackClerk), intent(in)              :: self
+    class(outputFile), intent(inout)           :: outFile
+    type(scoreMemory), intent(in)              :: mem
+    real(defReal)                              :: val, std
+    integer(shortInt)                          :: i
+    integer(shortInt),dimension(:),allocatable :: resArrayShape
+    character(nameLen)                         :: name
+
+    ! Begin block
+    call outFile % startBlock(self % getName())
+
+    ! If track clerk has map print map information
+    if( allocated(self % map)) then
+      call self % map % print(outFile)
+    end if
+
+    ! Write results.
+    ! Get shape of result array
+    if(allocated(self % map)) then
+      resArrayShape = [size(self % response), self % map % binArrayShape()]
+    else
+      resArrayShape = [size(self % response)]
+    end if
+
+    ! Start array
+    name ='Res'
+    call outFile % startArray(name, resArrayShape)
+
+    ! Print results to the file
+    do i=1,product(resArrayShape)
+      call mem % getResult(val, std, self % getMemAddress() - 1 + i)
+      call outFile % addResult(val,std)
+
+    end do
+
+    call outFile % endArray()
+    call outFile % endBlock()
+
+  end subroutine print
+
+end module trackClerk_class

From e02b5d08aeead99255d44a79a6532af0c9aa10c9 Mon Sep 17 00:00:00 2001
From: Mikolaj-A-Kowalski <mikolaj.kowalski@cantab.net>
Date: Sun, 14 Apr 2024 19:24:05 +0200
Subject: [PATCH 21/27] Make eigenPP work in MPI calculations

It is not reproducable at the moment
---
 PhysicsPackages/eigenPhysicsPackage_class.f90 | 81 ++++++++++++-------
 .../fixedSourcePhysicsPackage_class.f90       | 11 ++-
 2 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 991933fec..6f1fe706d 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -4,7 +4,12 @@ module eigenPhysicsPackage_class
   use universalVariables
   use endfConstants
   use genericProcedures,              only : fatalError, numToChar, rotateVector
-  use display_func,                   only : printFishLineR
+  use display_func,                   only : printFishLineR, statusMsg, printSectionStart, printSectionEnd, &
+                                             printSeparatorLine
+  use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
+#ifdef MPI
+  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD
+#endif
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
   use outputFile_class,               only : outputFile
@@ -89,6 +94,7 @@ module eigenPhysicsPackage_class
     integer(shortInt)  :: N_inactive
     integer(shortInt)  :: N_active
     integer(shortInt)  :: pop
+    integer(shortInt)  :: totalPop
     character(pathLen) :: outputFile
     character(nameLen) :: outputFormat
     integer(shortInt)  :: printSource = 0
@@ -124,17 +130,22 @@ module eigenPhysicsPackage_class
   subroutine run(self)
     class(eigenPhysicsPackage), intent(inout) :: self
 
-    print *, repeat("<>",50)
-    print *, "/\/\ EIGENVALUE CALCULATION /\/\"
+    call printSeparatorLine()
+    call printSectionStart("EIGENVALUE CALCULATION")
+
+    ! Skip RNG state forward based on the process rank
+    call self % pRNG % stride(getOffset(self % totalPop))
 
     call self % generateInitialState()
     call self % cycles(self % inactiveTally, self % inactiveAtch, self % N_inactive)
     call self % cycles(self % activeTally, self % activeAtch, self % N_active)
-    call self % collectResults()
 
-    print *
-    print *, "\/\/ END OF EIGENVALUE CALCULATION \/\/"
-    print *
+    if (isMpiMaster()) call self % collectResults()
+
+    call statusMsg("")
+    call printSectionEnd("END OF EIGENVALUE CALCULATION")
+    call statusMsg("")
+
   end subroutine
 
   !!
@@ -229,7 +240,7 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
       call self % thisCycle % cleanPop()
 
       ! Update RNG
-      call self % pRNG % stride(self % pop + 1)
+      call self % pRNG % stride(self % totalPop + 1)
 
       ! Send end of cycle report
       Nend = self % nextCycle % popSize()
@@ -281,12 +292,12 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
 
       ! Display progress
       call printFishLineR(i)
-      print *
-      print *, 'Cycle: ', numToChar(i), ' of ', numToChar(N_cycles)
-      print *, 'Pop: ', numToChar(Nstart) , ' -> ', numToChar(Nend)
-      print *, 'Elapsed time: ', trim(secToChar(elapsed_T))
-      print *, 'End time:     ', trim(secToChar(end_T))
-      print *, 'Time to end:  ', trim(secToChar(T_toEnd))
+      call statusMsg("")
+      call statusMsg("Cycle: " // numToChar(i) // " of " // numToChar(N_cycles))
+      call statusMsg("Pop: " // numToChar(Nstart) // " -> " // numToChar(Nend))
+      call statusMsg("Elapsed time: " // trim(secToChar(elapsed_T)))
+      call statusMsg("End time:     " // trim(secToChar(end_T)))
+      call statusMsg("Time to end:  " // trim(secToChar(T_toEnd)))
       call tally % display()
     end do
 
@@ -310,9 +321,12 @@ subroutine generateInitialState(self)
     call self % nextCycle % init(3 * self % pop)
 
     ! Generate initial source
-    print *, "GENERATING INITIAL FISSION SOURCE"
+    call statusMsg("GENERATING INITIAL FISSION SOURCE")
     call self % initSource % generate(self % thisCycle, self % pop, self % pRNG)
-    print *, "DONE!"
+    call statusMsg("DONE!")
+
+    ! Update RNG after source generation
+    call self % pRNG % stride(self % totalPop)
 
   end subroutine generateInitialState
 
@@ -330,7 +344,7 @@ subroutine collectResults(self)
     call out % printValue(self % pRNG % getSeed(),name)
 
     name = 'pop'
-    call out % printValue(self % pop,name)
+    call out % printValue(self % totalPop, name)
 
     name = 'Inactive_Cycles'
     call out % printValue(self % N_inactive,name)
@@ -385,7 +399,9 @@ subroutine init(self, dict)
     call cpu_time(self % CPU_time_start)
 
     ! Read calculation settings
-    call dict % get( self % pop,'pop')
+    call dict % get( self % totalPop, 'pop')
+    self % pop = getWorkshare(self % totalPop)
+
     call dict % get( self % N_inactive,'inactive')
     call dict % get( self % N_active,'active')
     call dict % get( nucData, 'XSdata')
@@ -430,6 +446,12 @@ subroutine init(self, dict)
       call FNV_1(string,seed_temp)
 
     end if
+
+    ! Broadcast seed to all processes
+#ifdef MPI
+    call MPI_Bcast(seed_temp, 1, MPI_INTEGER, MASTER_RANK, MPI_COMM_WORLD)
+#endif
+
     seed = seed_temp
     call self % pRNG % init(seed)
 
@@ -454,11 +476,11 @@ subroutine init(self, dict)
     self % nucData => ndReg_get(self % particleType)
 
     ! Call visualisation
-    if (dict % isPresent('viz')) then
-      print *, "Initialising visualiser"
+    if (dict % isPresent('viz') .and. isMPIMaster()) then
+      call statusMsg("Initialising visualiser")
       tempDict => dict % getDictPtr('viz')
       call viz % init(self % geom, tempDict)
-      print *, "Constructing visualisation"
+      call statusMsg("Constructing visualisation")
       call viz % makeViz()
       call viz % kill()
     endif
@@ -567,14 +589,15 @@ end subroutine kill
   subroutine printSettings(self)
     class(eigenPhysicsPackage), intent(in) :: self
 
-    print *, repeat("<>",50)
-    print *, "/\/\ EIGENVALUE CALCULATION WITH POWER ITERATION METHOD /\/\"
-    print *, "Inactive Cycles:    ", numToChar(self % N_inactive)
-    print *, "Active Cycles:      ", numToChar(self % N_active)
-    print *, "Neutron Population: ", numToChar(self % pop)
-    print *, "Initial RNG Seed:   ", numToChar(self % pRNG % getSeed())
-    print *
-    print *, repeat("<>",50)
+    call printSeparatorLine()
+    call printSectionStart("EIGENVALUE CALCULATION WITH POWER ITERATION METHOD")
+    call statusMsg("Inactive Cycles:    " // numToChar(self % N_inactive))
+    call statusMsg("Active Cycles:      " // numToChar(self % N_active))
+    call statusMsg("Neutron Population: " // numToChar(self % pop))
+    call statusMsg("Initial RNG Seed:   " // numToChar(self % pRNG % getSeed()))
+    call statusMsg("")
+    call printSeparatorLine()
+
   end subroutine printSettings
 
 end module eigenPhysicsPackage_class
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index 2a3b43050..aaf3ad00f 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -122,6 +122,9 @@ subroutine run(self)
     call printSeparatorLine()
     call printSectionStart("FIXED SOURCE CALCULATION")
 
+    ! Skip RNG state forward based on the process rank
+    call self % pRNG % stride(getOffset(self % totalPop))
+
     call self % cycles(self % tally, self % N_cycles)
 
     ! Collect results from other processes
@@ -142,7 +145,7 @@ subroutine cycles(self, tally, N_cycles)
     class(fixedSourcePhysicsPackage), intent(inout) :: self
     type(tallyAdmin), pointer,intent(inout)         :: tally
     integer(shortInt), intent(in)                   :: N_cycles
-    integer(shortInt)                               :: i, n, nParticles, offset
+    integer(shortInt)                               :: i, n, nParticles
     integer(shortInt), save                         :: j, bufferExtra
     type(particle), save                            :: p, transferP
     type(particleDungeon), save                     :: buffer
@@ -167,10 +170,6 @@ subroutine cycles(self, tally, N_cycles)
     !$omp end parallel
 
     nParticles = self % pop
-    offset = getOffset(self % totalPop)
-
-    ! Skip RNG state forward based on the process rank
-    call self % pRNG % stride(offset)
 
     ! Reset and start timer
     call timerReset(self % timerMain)
@@ -380,7 +379,7 @@ subroutine init(self, dict)
 
     end if
 
-    ! Brodcast seed to all processes
+    ! Broadcast seed to all processes
 #ifdef MPI
     call MPI_Bcast(seed_temp, 1, MPI_INTEGER, MASTER_RANK, MPI_COMM_WORLD)
 #endif

From c03350c45f5554e741b01d012f74dbe5c1cd8bbe Mon Sep 17 00:00:00 2001
From: "V. Raffuzzi" <vr339@eng.cam.ac.uk>
Date: Tue, 17 Sep 2024 16:49:06 +0100
Subject: [PATCH 22/27] Adding MPI to work with eigenPP

---
 DataStructures/Tests/heapQueue_test.f90       |   8 +-
 DataStructures/heapQueue_class.f90            |   6 +-
 .../aceDatabase/aceNeutronDatabase_class.f90  |  17 +-
 .../Tests/particleDungeon_test.f90            |  30 +-
 ParticleObjects/particleDungeon_class.f90     | 426 ++++++++++++++++--
 PhysicsPackages/eigenPhysicsPackage_class.f90 |  77 +++-
 .../fixedSourcePhysicsPackage_class.f90       |  15 +-
 RandomNumbers/RNG_class.f90                   |  19 +-
 SharedModules/genericProcedures.f90           |  20 +-
 SharedModules/mpi_func.f90                    |  93 +++-
 .../TallyClerks/centreOfMassClerk_class.f90   |   4 +-
 .../TallyClerks/dancoffBellClerk_class.f90    |  17 +-
 Tallies/TallyClerks/keffAnalogClerk_class.f90 |  31 +-
 .../TallyClerks/keffImplicitClerk_class.f90   |   2 +-
 .../TallyClerks/shannonEntropyClerk_class.f90 |   8 +-
 Tallies/TallyClerks/trackClerk_class.f90      |  22 +-
 Tallies/scoreMemory_class.f90                 |  36 +-
 Tallies/tallyAdmin_class.f90                  |  78 ++--
 18 files changed, 702 insertions(+), 207 deletions(-)

diff --git a/DataStructures/Tests/heapQueue_test.f90 b/DataStructures/Tests/heapQueue_test.f90
index c74206f48..47abda7f6 100644
--- a/DataStructures/Tests/heapQueue_test.f90
+++ b/DataStructures/Tests/heapQueue_test.f90
@@ -22,15 +22,15 @@ subroutine testBelowMaximum()
 
     call hq % init(8)
 
-    @assertEqual(hq % getSize() , 0)
+    @assertEqual(hq % getSize(), 0)
 
     do i = 1, size(seq)
       call hq % pushReplace(seq(i))
     end do
 
     ! Check that the maximum value is the maximum value in the sequence
-    @assertEqual(hq % maxValue() , maxval(seq))
-    @assertEqual(hq % getSize() , size(seq))
+    @assertEqual(hq % maxValue(), maxval(seq))
+    @assertEqual(hq % getSize(), size(seq))
 
   end subroutine testBelowMaximum
 
@@ -57,7 +57,7 @@ subroutine testAboveMaximum()
     end do
 
     ! Check that the threshold is correct
-    @assertEqual(hq % maxValue() , 2.0_defReal)
+    @assertEqual(hq % maxValue(), 2.0_defReal)
 
   end subroutine testAboveMaximum
 
diff --git a/DataStructures/heapQueue_class.f90 b/DataStructures/heapQueue_class.f90
index 4f657dd33..0a101cf93 100644
--- a/DataStructures/heapQueue_class.f90
+++ b/DataStructures/heapQueue_class.f90
@@ -1,7 +1,9 @@
 module heapQueue_class
+
   use numPrecision
   use genericProcedures, only : swap
   use errors_mod,        only : fatalError
+
   implicit none
   private
 
@@ -94,7 +96,7 @@ subroutine push(self, val)
     self % size = self % size + 1
     self % heap(self % size) = val
 
-    ! Sift the new value up the heap to restore the heap property
+    ! Shift the new value up the heap to restore the heap property
     child = self % size
     parent = child / 2
 
@@ -122,7 +124,7 @@ subroutine replace(self, val)
     parent = 1
     child = 2
 
-    ! Sift down the new value until heap property is restored be comparing
+    ! Shift down the new value until heap property is restored be comparing
     ! with the largest child and swapping if necessary
     do while (child <= self % size)
 
diff --git a/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90 b/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
index 87f64c2a8..041c07691 100644
--- a/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
+++ b/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
@@ -3,12 +3,13 @@ module aceNeutronDatabase_class
   use numPrecision
   use endfConstants
   use universalVariables
-  use display_func,      only : statusMsg
-  use genericProcedures, only : fatalError, numToChar
-  use dictionary_class,  only : dictionary
-  use RNG_class,         only : RNG
-  use charMap_class,     only : charMap
-  use intMap_class,      only : intMap
+  use errors_mod,         only : fatalError
+  use genericProcedures,  only : numToChar, removeDuplicatesSorted, binarySearch
+  use display_func,       only : statusMsg
+  use dictionary_class,   only : dictionary
+  use RNG_class,          only : RNG
+  use charMap_class,      only : charMap
+  use intMap_class,       only : intMap
 
   ! Nuclear Data Interfaces
   use nuclearDatabase_inter,        only : nuclearDatabase
@@ -1371,7 +1372,7 @@ subroutine initMajorant(self, loud)
     self % eGridUnion = removeDuplicatesSorted(tmpGrid)
 
     if (loud) then
-      print '(A)', 'CE unionised energy grid has size: '//numToChar(size(self % eGridUnion))
+      call statusMsg("CE unionised energy grid has size: "//numToChar(size(self % eGridUnion)))
     end if
 
     ! Allocate unionised majorant
@@ -1447,7 +1448,7 @@ subroutine initMajorant(self, loud)
 
     end do
 
-    if (loud) print '(A)', 'CE unionised majorant cross section calculation completed'
+    if (loud) call statusMsg("CE unionised majorant cross section calculation completed")
 
   end subroutine initMajorant
 
diff --git a/ParticleObjects/Tests/particleDungeon_test.f90 b/ParticleObjects/Tests/particleDungeon_test.f90
index 9243442b5..22f544b9a 100644
--- a/ParticleObjects/Tests/particleDungeon_test.f90
+++ b/ParticleObjects/Tests/particleDungeon_test.f90
@@ -217,7 +217,7 @@ subroutine testNormPopDown()
     ! Store some particles with non-uniform weight
     do i = 1,10
       p % w = 0.5_defReal + i * 0.1_defReal
-      p % broodID = 1       ! Avoid triggering error on sort by broodID
+      p % broodID = i       ! Avoid triggering error on sort by broodID
       call dungeon % detain(p)
     end do
 
@@ -255,7 +255,7 @@ subroutine testNormPopUp()
     ! Store some particles with non-uniform weight
     do i = 1,10
       p % w = 0.5_defReal + i * 0.1_defReal
-      p % broodID = 1       ! Avoid triggering error on sort by broodID
+      p % broodID = i       ! Avoid triggering error on sort by broodID
       call dungeon % detain(p)
     end do
 
@@ -280,23 +280,24 @@ end subroutine testNormPopUp
   subroutine testSortingByBroodID()
     type(particleDungeon)    :: dungeon
     type(particleState)      :: p
-    integer(shortInt)        :: i
+    integer(shortInt)        :: i, N
     real(defReal), parameter :: TOL = 1.0E-9
 
     ! Initialise
-    call dungeon % init(10)
+    N = 10
+    call dungeon % init(N)
 
     ! Store some particles with brood ID in reverse order
-    do i = 1,10
-      p % broodID = 10 - i + 1
+    do i = 1, N
+      p % broodID = N - i + 1
       call dungeon % detain(p)
     end do
 
     ! Sort by brood ID
-    call dungeon % sortByBroodID(10)
+    call dungeon % sortByBroodID(N)
 
     ! Verify order
-    do i = 1,10
+    do i = 1, N
       p = dungeon % get(i)
       @assertEqual(i, p % broodID)
     end do
@@ -311,29 +312,30 @@ end subroutine testSortingByBroodID
   subroutine testSortingByBroodID_withDuplicates()
     type(particleDungeon)    :: dungeon
     type(particleState)      :: p
-    integer(shortInt)        :: i, j
+    integer(shortInt)        :: i, j, N
     integer(shortInt), parameter :: N_duplicates = 7
     real(defReal), parameter :: TOL = 1.0E-9
 
     ! Initialise
-    call dungeon % init(10 * N_duplicates)
+    N = 10
+    call dungeon % init(N * N_duplicates)
 
     ! Store some particles with brood ID in reverse order
     ! Use the group number to distinguish duplicates and make sure
     ! that the insertion order is preserved (for particles with the same brood ID)
     do j = 1, N_duplicates
-      do i = 1, 10
-        p % broodID = 10 - i + 1
+      do i = 1, N
+        p % broodID = N - i + 1
         p % G = j
         call dungeon % detain(p)
       end do
     end do
 
     ! Sort by brood ID
-    call dungeon % sortByBroodID(10)
+    call dungeon % sortByBroodID(N)
 
     ! Verify order
-    do i = 1,10
+    do i = 1, N
       do j = 1, N_duplicates
         p = dungeon % get(j + (i-1) * N_duplicates)
         @assertEqual(i, p % broodID)
diff --git a/ParticleObjects/particleDungeon_class.f90 b/ParticleObjects/particleDungeon_class.f90
index 89b9db5a6..9b37b245b 100644
--- a/ParticleObjects/particleDungeon_class.f90
+++ b/ParticleObjects/particleDungeon_class.f90
@@ -1,9 +1,18 @@
 module particleDungeon_class
 
   use numPrecision
-  use genericProcedures,     only : fatalError, numToChar
+  use genericProcedures,     only : fatalError, numToChar, swap
   use particle_class,        only : particle, particleState
   use RNG_class,             only : RNG
+  use heapQueue_class,       only : heapQueue
+
+  use mpi_func,              only : isMPIMaster, getMPIWorldSize, getMPIRank, getOffset
+#ifdef MPI
+  use mpi_func,              only : mpi_gather, mpi_allgather, mpi_send, mpi_recv, &
+                                    mpi_Bcast, MPI_COMM_WORLD, MASTER_RANK, MPI_DEFREAL, &
+                                    MPI_SHORTINT, MPI_LONGINT, MPI_PARTICLE_STATE, &
+                                    MPI_STATUS_IGNORE, particleStateDummy
+#endif
 
   implicit none
   private
@@ -60,8 +69,8 @@ module particleDungeon_class
   !!
   type, public :: particleDungeon
     private
-    real(defReal),public :: k_eff = ONE ! k-eff for fission site generation rate normalisation
-    integer(shortInt)    :: pop = 0     ! Current population size of the dungeon
+    real(defReal), public :: k_eff = ONE ! k-eff for fission site generation rate normalisation
+    integer(shortInt)     :: pop = 0     ! Current population size of the dungeon
 
     ! Storage space
     type(particleState), dimension(:), allocatable, public :: prisoners
@@ -92,6 +101,7 @@ module particleDungeon_class
     procedure  :: setSize
     procedure  :: printToFile
     procedure  :: sortByBroodID
+    procedure  :: samplingWoReplacement
 
     ! Private procedures
     procedure, private :: detain_particle
@@ -100,6 +110,9 @@ module particleDungeon_class
     procedure, private :: detainCritical_particleState
     procedure, private :: replace_particle
     procedure, private :: replace_particleState
+#ifdef MPI
+    procedure, private :: loadBalancing
+#endif
 
   end type particleDungeon
 
@@ -112,7 +125,7 @@ subroutine init(self,maxSize)
     class(particleDungeon), intent(inout) :: self
     integer(shortInt), intent(in)         :: maxSize
 
-    if(allocated(self % prisoners)) deallocate(self % prisoners)
+    if (allocated(self % prisoners)) deallocate(self % prisoners)
     allocate(self % prisoners(maxSize))
     self % pop    = 0
 
@@ -128,7 +141,7 @@ elemental subroutine kill(self)
     self % pop = 0
 
     ! Deallocate memeory
-    if(allocated(self % prisoners)) deallocate(self % prisoners)
+    if (allocated(self % prisoners)) deallocate(self % prisoners)
 
   end subroutine kill
 
@@ -293,7 +306,7 @@ subroutine replace_particle(self, p, idx)
     character(100),parameter :: Here = 'replace_particle (particleDungeon_class.f90)'
 
     ! Protect against out-of-bounds access
-    if( idx <= 0 .or. idx > self % pop ) then
+    if (idx <= 0 .or. idx > self % pop) then
       call fatalError(Here,'Out of bounds access with idx: '// numToChar(idx)// &
                            ' with particle population of: '// numToChar(self % pop))
     end if
@@ -313,7 +326,7 @@ subroutine replace_particleState(self, p, idx)
     character(100),parameter :: Here = 'replace_particleState (particleDungeon_class.f90)'
 
     ! Protect against out-of-bounds access
-    if( idx <= 0 .or. idx > self % pop ) then
+    if (idx <= 0 .or. idx > self % pop) then
       call fatalError(Here,'Out of bounds access with idx: '// numToChar(idx)// &
                            ' with particle population of: '// numToChar(self % pop))
     end if
@@ -323,7 +336,6 @@ subroutine replace_particleState(self, p, idx)
 
   end subroutine replace_particleState
 
-
   !!
   !! Copy particle from a location inside the dungeon
   !!
@@ -344,7 +356,7 @@ subroutine copy(self, p, idx)
     character(100), parameter :: Here = 'copy (particleDungeon_class.f90)'
 
     ! Protect against out-of-bounds access
-    if( idx <= 0 .or. idx > self % pop ) then
+    if (idx <= 0 .or. idx > self % pop) then
       call fatalError(Here,'Out of bounds access with idx: '// numToChar(idx)// &
                            ' with particle population of: '// numToChar(self % pop))
     end if
@@ -402,25 +414,376 @@ subroutine normWeight(self,totWgt)
 
   end subroutine normWeight
 
+  !!
+  !! Normalise total number of particles in the dungeon to match the provided number
+  !!
+  !! Does not take weight of a particle into account!
+  !!
+  subroutine normSize(self, totPop, rand)
+    class(particleDungeon), intent(inout) :: self
+    integer(shortInt), intent(in)         :: totPop
+    class(RNG), intent(inout)             :: rand
+    type(RNG)                             :: rankRand, masterRand
+    type(heapQueue)                       :: heap
+    real(defReal)                         :: threshold, rn
+    integer(longInt)                      :: seedTemp
+    integer(shortInt)                     :: maxbroodID, totSites, excess, heapSize, &
+                                             n_duplicates, i, j, n_copies, count, nRanks, &
+                                             rank
+    integer(longInt), dimension(:), allocatable  :: seeds
+    integer(shortInt), dimension(:), allocatable :: keepers, popSizes
+#ifdef MPI
+    integer(shortInt)                            :: error
+#endif
+    character(100), parameter :: Here = 'normSize (particleDungeon_class.f90)'
+
+    ! Determine the maximum brood ID and sort the dungeon for OMP reproducibility
+    maxBroodID = maxval(self % prisoners(1:self % pop) % broodID)
+    call self % sortByBroodID(maxbroodID)
+
+    ! Get MPI world size and allocate rng seed vector, needed by all processes
+    nRanks = getMPIWorldSize()
+    allocate(seeds(nRanks), popSizes(nRanks))
+
+    ! Initialise popSizes with the correct value for when only one process is used
+    popSizes  = self % pop
+    seeds     = 0
+    threshold = ONE
+
+#ifdef MPI
+    ! Get the population sizes of all ranks into the array popSizes in master branch
+    call mpi_gather(self % pop, 1, MPI_SHORTINT, popSizes, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD, error)
+#endif
+
+    ! In the master process, calculate sampling threshold for the whole population
+    ! and send it to all processes
+    if (isMPIMaster()) then
+
+      ! Calculate number of sites generated over all processes and difference to target population
+      totSites = sum(popSizes)
+      excess   = totSites - totPop
+
+      ! Assign heapQueue size according to case, accounting for cases where the whole
+      ! population might have to be replicated due to massive undersampling
+      if (excess < 0) then
+        n_duplicates = modulo(-excess, totSites)
+        heapSize     = n_duplicates
+      else
+        heapSize = excess
+      end if
+
+      if (heapSize /= 0) then
+
+        ! Copy rng
+        masterRand = rand
+
+        ! Initialise heapQueue and push upper bound larger than 1.0
+        call heap % init(heapSize)
+        call heap % pushReplace(TWO)
+
+        ! Loop to generate totSites random numbers to fill the heapQueue
+        do i = 1, nRanks
+
+          ! Save rng seed: this will be the starting seed in the i-th rank
+          seeds(i) = masterRand % currentState()
+
+          ! Populate heapQueue
+          do j = 1, popSizes(i)
+            rn = masterRand % get()
+            if (rn < heap % maxValue()) call heap % pushReplace(rn)
+          end do
+
+        end do
+
+        ! Save sampling threshold
+        threshold = heap % maxValue()
+
+      end if
+
+    end if
+
+    ! Broadcast threshold, excess and random number seeds to all processes
+#ifdef MPI
+    call MPI_Bcast(threshold, 1, MPI_DEFREAL, MASTER_RANK, MPI_COMM_WORLD)
+    call MPI_Bcast(excess, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD)
+    call MPI_Bcast(seeds, nRanks, MPI_LONGINT, MASTER_RANK, MPI_COMM_WORLD)
+#endif
+
+    ! Get local process rank and initialise local rng with the correct seed
+    rank = getMPIRank()
+    seedTemp = seeds(rank + 1)
+    call rankRand % init(seedTemp)
+
+    ! Perform the actual sampling
+    if (excess > 0) then
+
+      allocate(keepers(self % pop))
+      keepers = 0
+      count   = 0
+
+      ! Loop over source sites
+      do i = 1, self % pop
+
+        ! Save the indexes of the sites to keep and increment counter
+        if (rankRand % get() > threshold) then
+          count = count + 1
+          keepers(count) = i
+        end if
+
+      end do
+
+      ! Loop through accepted sites to save them
+      do i = 1, count
+        self % prisoners(i) = self % prisoners(keepers(i))
+      end do
+
+      ! Update population number
+      self % pop = count
+
+    elseif (excess < 0) then
+
+      ! Check if copies have to be made and the number of particles to duplicate with the sampling
+      totSites     = excess + totPop
+      n_copies     = -excess / totSites
+      n_duplicates = modulo(-excess, totSites)
+
+      ! Copy all the particles the number of times needed
+      do i = 1, n_copies
+        self % prisoners(self % pop * i + 1 : self % pop * (i + 1)) = self % prisoners(1:self % pop)
+      end do
+
+      ! Loop over population to duplicate from
+      count = self % pop * (n_copies + 1)
+
+      if (n_duplicates /= 0) then
+
+        do i = 1, self % pop
+          ! Save duplicated particles at the end of the dungeon
+          if (rankRand % get() <= threshold) then
+            count = count + 1
+            self % prisoners(count) = self % prisoners(i)
+          end if
+        end do
+
+      end if
+
+      ! Update population number
+      self % pop = count
+
+      ! Determine the maximum brood ID and sort the dungeon again for MPI reproducibility
+      maxBroodID = maxval(self % prisoners(1:self % pop) % broodID)
+      call self % sortByBroodID(maxbroodID)
+
+    end if
+
+    ! Get the new population in the case of one thread
+    popSizes = self % pop
+
+#ifdef MPI
+    ! Get the updated population numbers from all processes
+    call mpi_allgather(self % pop, 1, MPI_SHORTINT, popSizes, 1, MPI_SHORTINT, MPI_COMM_WORLD, error)
+#endif
+
+    ! Check that normalisation worked
+    if (sum(popSizes) /= totPop) call fatalError(Here, 'Normalisation failed!')
+
+#ifdef MPI
+    ! Perform load balancing by redistributing particles across processes
+    if (nRanks > 1) call self % loadBalancing(totPop, nRanks, rank, popSizes)
+#endif
+
+  end subroutine normSize
+
+  !!
+  !! Perform nearest neighbor load balancing
+  !!
+#ifdef MPI
+  subroutine loadBalancing(self, totPop, nRanks, rank, popSizes)
+    class(particleDungeon), intent(inout)        :: self
+    integer(shortInt), intent(in)                :: totPop
+    integer(shortInt), intent(in)                :: nRanks
+    integer(shortInt), intent(in)                :: rank
+    integer(shortInt), dimension(:), intent(in)  :: popSizes
+    integer(shortInt), dimension(:), allocatable :: rankOffsets
+    integer(shortInt), dimension(2)              :: offset, targetOffset
+    integer(shortInt)                            :: mpiOffset, excess, error
+    type(particleState), dimension(:), allocatable      :: stateBuffer
+    type(particleStateDummy), dimension(:), allocatable :: dummyBuffer
+
+    ! Get expected particle population in each process via the offset
+    mpiOffset = getOffset(totPop)
+
+    ! Communicates the offset from all processes to all processes
+    allocate(rankOffsets(nRanks))
+    call mpi_allgather(mpiOffset, 1, MPI_SHORTINT, rankOffsets, 1, MPI_SHORTINT, MPI_COMM_WORLD, error)
+
+    ! Calculate actual and target cumulative number of sites in the processes before
+    offset(1)       = sum(popSizes(1 : rank))
+    offset(2)       = sum(popSizes(1 : rank + 1))
+    targetOffset(1) = rankOffsets(rank + 1)
+    if (rank + 1 == nRanks) then
+      targetOffset(2) = totPop
+    else
+      targetOffset(2) = rankOffsets(rank + 2)
+    end if
+
+    ! If needed, send/receive particle states from/to the end of the dungeon
+    excess = offset(2) - targetOffset(2)
+
+    if (excess > 0) then
+
+      ! Send particles from the end of the dungeon to the rank above
+      stateBuffer = self % prisoners(self % pop - excess + 1 : self % pop)
+      call initStateDummies(stateBuffer, dummyBuffer)
+      call mpi_send(dummyBuffer, excess, MPI_PARTICLE_STATE, rank + 1, rank, MPI_COMM_WORLD, error)
+      self % pop = self % pop - excess
+
+    elseif (excess < 0) then
+
+      ! Receive particles from the rank above and store them at the end of the dungeon
+      excess = -excess
+      allocate(dummyBuffer(excess))
+      call mpi_recv(dummyBuffer, excess, MPI_PARTICLE_STATE, rank + 1, rank + 1, &
+                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, error)
+      call createStatesFromDummies(dummyBuffer, stateBuffer)
+      self % prisoners(self % pop + 1 : self % pop + excess) = stateBuffer
+      self % pop = self % pop + excess
+
+    end if
+
+    if (excess /= 0) deallocate(stateBuffer, dummyBuffer)
+
+    ! If needed, send/receive particle states from/to the beginning of the dungeon
+    excess = offset(1) - targetOffset(1)
+
+    if (excess < 0) then
+
+      ! Send particles from the beginning of the dungeon to the rank below
+      excess = -excess
+      stateBuffer = self % prisoners(1 : excess)
+      call initStateDummies(stateBuffer, dummyBuffer)
+      call mpi_send(dummyBuffer, excess, MPI_PARTICLE_STATE, rank - 1, rank, MPI_COMM_WORLD, error)
+
+      ! Move the remaining particles to the beginning of the dungeon
+      self % prisoners(1 : self % pop - excess) = self % prisoners(excess + 1 : self % pop)
+      self % pop = self % pop - excess
+
+    elseif (excess > 0) then
+
+      ! Receive particles from the rank below and store them at the beginning of the dungeon
+      allocate(dummyBuffer(excess))
+      call mpi_recv(dummyBuffer, excess, MPI_PARTICLE_STATE, rank - 1, rank - 1, &
+                    MPI_COMM_WORLD, MPI_STATUS_IGNORE, error)
+      call createStatesFromDummies(dummyBuffer, stateBuffer)
+      self % prisoners(excess + 1 : self % pop + excess) = self % prisoners(1 : self % pop)
+      self % prisoners(1 : excess) = stateBuffer
+      self % pop = self % pop + excess
+
+    end if
+
+  end subroutine loadBalancing
+#endif
+
+  !!
+  !! Helper procedure for MPI loadBalancing
+  !!
+  !! Given an input array of particleState, it allocates a vector of particleStateDummy
+  !! of the same length and copies all the particleState variables. It returns
+  !! the array of particleStateDummy
+  !!
+#ifdef MPI
+  subroutine initStateDummies(states, dummies)
+    type(particleState), dimension(:), intent(in)                    :: states
+    type(particleStateDummy), dimension(:), allocatable, intent(out) :: dummies
+    integer(shortInt)                                                :: i, N
+
+    ! Allocate particleStateDummy array
+    N = size(states)
+    allocate(dummies(N))
+
+    ! Copy particleState attributes
+    do i = 1, N
+      dummies(i) % r   = states(i) % r
+      dummies(i) % dir = states(i) % dir
+    end do
+
+    dummies % wgt  = states % wgt
+    dummies % E    = states % E
+    dummies % G    = states % G
+    dummies % isMG = states % isMG
+    dummies % type = states % type
+    dummies % time = states % time
+    dummies % matIdx     = states % matIdx
+    dummies % uniqueID   = states % uniqueID
+    dummies % cellIdx    = states % cellIdx
+    dummies % collisionN = states % collisionN
+    dummies % broodID    = states % broodID
+
+  end subroutine initStateDummies
+#endif
+
+  !!
+  !! Helper procedure for MPI loadBalancing
+  !!
+  !! Given an input array of particleStateDummy, it allocates a vector of particleState
+  !! of the same length and copies all the particleStateDummy variables. It returns
+  !! the array of particleState
+  !!
+#ifdef MPI
+  subroutine createStatesFromDummies(dummies, states)
+    type(particleStateDummy), dimension(:), intent(in)          :: dummies
+    type(particleState), dimension(:), allocatable, intent(out) :: states
+    integer(shortInt)                                           :: i, N
+
+    ! Allocate particleState array
+    N = size(dummies)
+    allocate(states(N))
+
+    ! Copy particleStateDummy attributes
+    do i = 1, N
+      states(i) % r   = dummies(i) % r
+      states(i) % dir = dummies(i) % dir
+    end do
+
+    states % wgt  = dummies % wgt
+    states % E    = dummies % E
+    states % G    = dummies % G
+    states % isMG = dummies % isMG
+    states % type = dummies % type
+    states % time = dummies % time
+    states % matIdx     = dummies % matIdx
+    states % uniqueID   = dummies % uniqueID
+    states % cellIdx    = dummies % cellIdx
+    states % collisionN = dummies % collisionN
+    states % broodID    = dummies % broodID
+
+  end subroutine createStatesFromDummies
+#endif
+
+
   !!
   !! Normalise total number of particles in the dungeon to match the provided number.
-  !! Randomly duplicate or remove particles to match the number.
+  !! Randomly duplicate or remove particles to match the number, performing
+  !! sampling without replacement (Knuth S algorithm).
+  !!
   !! Does not take weight of a particle into account!
   !!
-  subroutine normSize(self, N, rand)
+  !! NOTE: this procedure is not currently used
+  !!
+  subroutine samplingWoReplacement(self, N, rand)
     class(particleDungeon), intent(inout) :: self
     integer(shortInt), intent(in)         :: N
     class(RNG), intent(inout)             :: rand
     integer(shortInt)                     :: excessP, n_copies, n_duplicates
     integer(shortInt)                     :: i, idx, maxBroodID
     integer(shortInt), dimension(:), allocatable :: duplicates
-    character(100), parameter :: Here =' normSize (particleDungeon_class.f90)'
+    character(100), parameter :: Here = 'samplingWoReplacement (particleDungeon_class.f90)'
 
     ! Protect against invalid N
-    if( N > size(self % prisoners)) then
+    if (N > size(self % prisoners)) then
       call fatalError(Here,'Requested size: '//numToChar(N) //&
                            'is greater then max size: '//numToChar(size(self % prisoners)))
-    else if ( N <= 0 ) then
+    else if (N <= 0) then
       call fatalError(Here,'Requested size: '//numToChar(N) //' is not +ve')
     end if
 
@@ -431,7 +794,7 @@ subroutine normSize(self, N, rand)
     ! Calculate excess particles to be removed
     excessP = self % pop - N
 
-    if (excessP > 0 ) then ! Reduce population with reservoir sampling
+    if (excessP > 0) then ! Reduce population with reservoir sampling
       do i = N + 1, self % pop
         ! Select new index. Copy data if it is in the safe zone (<= N).
         idx = int(i * rand % get()) + 1
@@ -447,7 +810,7 @@ subroutine normSize(self, N, rand)
       n_copies = excessP / self % pop
       n_duplicates = modulo(excessP, self % pop)
 
-      ! Copy all particle maximum possible number of times
+      ! Copy all particles the maximum possible number of times
       do i = 1, n_copies
         self % prisoners(self % pop * i + 1 : self % pop * (i + 1)) = self % prisoners(1:self % pop)
       end do
@@ -470,7 +833,7 @@ subroutine normSize(self, N, rand)
 
     end if
 
-  end subroutine normSize
+  end subroutine samplingWoReplacement
 
   !!
   !! Reorder the dungeon so the brood ID is in the ascending order
@@ -482,7 +845,7 @@ subroutine sortByBroodID(self, k)
     class(particleDungeon), intent(inout)        :: self
     integer(shortInt), intent(in)                :: k
     integer(shortInt), dimension(k)              :: count
-    integer(shortInt)                            :: i, id, loc, c
+    integer(shortInt)                            :: i, id, loc, c, j
     integer(shortInt), dimension(:), allocatable :: perm
     type(particleState)                          :: tmp
     character(100), parameter :: Here = 'sortBybroodID (particleDungeon_class.f90)'
@@ -514,19 +877,22 @@ subroutine sortByBroodID(self, k)
 
     ! Permute particles
     do i = 1, self % pop
-      loc = perm(i)
+      j = i
 
-      ! If the element was already swapped follow it to its location
-      do while (loc < i)
-        loc = perm(loc)
-      end do
+      do while(i /= perm(i))
+        loc = perm(i)
 
-      ! Swap elements
-      if (loc /= i) then
-        tmp = self % prisoners(i)
-        self % prisoners(i) = self % prisoners(loc)
-        self % prisoners(loc) = tmp
-      end if
+        ! Swap elements
+        if (loc /= j) then
+          tmp = self % prisoners(j)
+          self % prisoners(j) = self % prisoners(loc)
+          self % prisoners(loc) = tmp
+        end if
+        call swap(perm(i), perm(loc))
+
+        j = loc
+
+      end do
 
     end do
 
@@ -561,7 +927,7 @@ function popWeight(self) result(wgt)
     class(particleDungeon), intent(in) :: self
     real(defReal)                      :: wgt
 
-    wgt = sum( self % prisoners(1:self % pop) % wgt )
+    wgt = sum(self % prisoners(1:self % pop) % wgt)
 
   end function popWeight
 
diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 6f1fe706d..ba7ef62f1 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -3,16 +3,18 @@ module eigenPhysicsPackage_class
   use numPrecision
   use universalVariables
   use endfConstants
-  use genericProcedures,              only : fatalError, numToChar, rotateVector
-  use display_func,                   only : printFishLineR, statusMsg, printSectionStart, printSectionEnd, &
-                                             printSeparatorLine
+  use genericProcedures,              only : numToChar, rotateVector
+  use display_func,                   only : printFishLineR, statusMsg, printSectionStart, &
+                                             printSectionEnd, printSeparatorLine
   use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
 #ifdef MPI
-  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD
+  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD, &
+                                             MPI_DEFREAL, mpi_reduce, MPI_SUM
 #endif
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
   use outputFile_class,               only : outputFile
+  use errors_mod,                     only : fatalError
 
   ! Timers
   use timer_mod,                      only : registerTimer, timerStart, timerStop, &
@@ -137,9 +139,14 @@ subroutine run(self)
     call self % pRNG % stride(getOffset(self % totalPop))
 
     call self % generateInitialState()
+
     call self % cycles(self % inactiveTally, self % inactiveAtch, self % N_inactive)
     call self % cycles(self % activeTally, self % activeAtch, self % N_active)
 
+    ! Collect results from other processes
+    call self % inactiveTally % collectDistributed()
+    call self % activeTally % collectDistributed()
+
     if (isMpiMaster()) call self % collectResults()
 
     call statusMsg("")
@@ -157,7 +164,7 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
     type(tallyAdmin), pointer,intent(inout)   :: tallyAtch
     integer(shortInt), intent(in)             :: N_cycles
     type(particleDungeon), save               :: buffer
-    integer(shortInt)                         :: i, n, Nstart, Nend, nParticles
+    integer(shortInt)                         :: i, n, nStart, nEnd, nParticles
     class(tallyResult),allocatable            :: res
     type(collisionOperator), save             :: collOp
     class(transportOperator),allocatable,save :: transOp
@@ -165,6 +172,9 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
     type(particle), save                      :: neutron
     real(defReal)                             :: k_old, k_new
     real(defReal)                             :: elapsed_T, end_T, T_toEnd
+#ifdef MPI
+    integer(shortInt)                         :: error, nTemp
+#endif
     character(100),parameter :: Here ='cycles (eigenPhysicsPackage_class.f90)'
     !$omp threadprivate(neutron, buffer, collOp, transOp, pRNG)
 
@@ -187,10 +197,10 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
     call timerReset(self % timerMain)
     call timerStart(self % timerMain)
 
-    do i=1,N_cycles
+    do i = 1, N_cycles
 
       ! Send start of cycle report
-      Nstart = self % thisCycle % popSize()
+      nStart = self % thisCycle % popSize()
       call tally % reportCycleStart(self % thisCycle)
 
       nParticles = self % thisCycle % popSize()
@@ -219,10 +229,10 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
           ! Transport particle until its death
           history: do
             call transOp % transport(neutron, tally, buffer, self % nextCycle)
-            if(neutron % isDead) exit history
+            if (neutron % isDead) exit history
 
             call collOp % collide(neutron, tally, buffer, self % nextCycle)
-            if(neutron % isDead) exit history
+            if (neutron % isDead) exit history
           end do history
 
           ! Clear out buffer
@@ -243,7 +253,7 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
       call self % pRNG % stride(self % totalPop + 1)
 
       ! Send end of cycle report
-      Nend = self % nextCycle % popSize()
+      nEnd = self % nextCycle % popSize()
       call tally % reportCycleEnd(self % nextCycle)
 
       if (self % UFS) then
@@ -251,9 +261,12 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
       end if
 
       ! Normalise population
-      call self % nextCycle % normSize(self % pop, self % pRNG)
+      call self % nextCycle % normSize(self % totalPop, self % pRNG)
 
-      if(self % printSource == 1) then
+      ! Update RNG after it was used to normalise particle population
+      call self % pRNG % stride(1)
+
+      if (self % printSource == 1) then
         call self % nextCycle % printToFile(trim(self % outputFile)//'_source'//numToChar(i))
       end if
 
@@ -274,6 +287,11 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
 
       end select
 
+#ifdef MPI
+      ! Broadcast k_eff obtained in the master to all processes
+      call MPI_Bcast(k_new, 1, MPI_DEFREAL, MASTER_RANK, MPI_COMM_WORLD)
+#endif
+
       ! Load new k-eff estimate into next cycle dungeon
       k_old = self % nextCycle % k_eff
       self % nextCycle % k_eff = k_new
@@ -289,22 +307,29 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
       end_T = real(N_cycles,defReal) * elapsed_T / i
       T_toEnd = max(ZERO, end_T - elapsed_T)
 
+#ifdef MPI
+      ! Print the population numbers referred to all processes to screen
+      call mpi_reduce(nStart, nTemp, 1, MPI_INTEGER, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      nStart = nTemp
+      call mpi_reduce(nEnd, nTemp, 1, MPI_INTEGER, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      nEnd = nTemp
+#endif
 
       ! Display progress
       call printFishLineR(i)
       call statusMsg("")
       call statusMsg("Cycle: " // numToChar(i) // " of " // numToChar(N_cycles))
-      call statusMsg("Pop: " // numToChar(Nstart) // " -> " // numToChar(Nend))
+      call statusMsg("Pop: " // numToChar(nStart) // " -> " // numToChar(nEnd))
       call statusMsg("Elapsed time: " // trim(secToChar(elapsed_T)))
       call statusMsg("End time:     " // trim(secToChar(end_T)))
       call statusMsg("Time to end:  " // trim(secToChar(T_toEnd)))
       call tally % display()
+
     end do
 
     ! Load elapsed time
     self % time_transport = self % time_transport + elapsed_T
 
-
   end subroutine cycles
 
   !!
@@ -338,26 +363,26 @@ subroutine collectResults(self)
     type(outputFile)                          :: out
     character(nameLen)                        :: name
 
-    call out % init(self % outputFormat, filename=self % outputFile)
+    call out % init(self % outputFormat, filename = self % outputFile)
 
     name = 'seed'
-    call out % printValue(self % pRNG % getSeed(),name)
+    call out % printValue(self % pRNG % getSeed(), name)
 
     name = 'pop'
     call out % printValue(self % totalPop, name)
 
     name = 'Inactive_Cycles'
-    call out % printValue(self % N_inactive,name)
+    call out % printValue(self % N_inactive, name)
 
     name = 'Active_Cycles'
-    call out % printValue(self % N_active,name)
+    call out % printValue(self % N_active, name)
 
     call cpu_time(self % CPU_time_end)
     name = 'Total_CPU_Time'
-    call out % printValue((self % CPU_time_end - self % CPU_time_start),name)
+    call out % printValue((self % CPU_time_end - self % CPU_time_start), name)
 
     name = 'Total_Transport_Time'
-    call out % printValue(self % time_transport,name)
+    call out % printValue(self % time_transport, name)
 
     ! Print Inactive tally
     name = 'inactive'
@@ -376,7 +401,6 @@ subroutine collectResults(self)
 
   end subroutine collectResults
 
-
   !!
   !! Initialise from individual components and dictionaries for inactive and active tally
   !!
@@ -399,7 +423,7 @@ subroutine init(self, dict)
     call cpu_time(self % CPU_time_start)
 
     ! Read calculation settings
-    call dict % get( self % totalPop, 'pop')
+    call dict % get(self % totalPop, 'pop')
     self % pop = getWorkshare(self % totalPop)
 
     call dict % get( self % N_inactive,'inactive')
@@ -436,7 +460,7 @@ subroutine init(self, dict)
 
     ! *** It is a bit silly but dictionary cannot store longInt for now
     !     so seeds are limited to 32 bits (can be -ve)
-    if( dict % isPresent('seed')) then
+    if (dict % isPresent('seed')) then
       call dict % get(seed_temp,'seed')
 
     else
@@ -537,7 +561,7 @@ subroutine init(self, dict)
 
     ! Initialise active and inactive tally attachments
     ! Inactive tally attachment
-    call locDict1 % init(2)
+    call locDict1 % init(3)
     call locDict2 % init(2)
 
     call locDict2 % store('type','keffAnalogClerk')
@@ -551,12 +575,14 @@ subroutine init(self, dict)
     call locDict1 % kill()
 
     ! Active tally attachment
-    call locDict1 % init(2)
+    ! Note: mpiSync ensures that k_eff is synchronised between all processes each cycle
+    call locDict1 % init(3)
     call locDict2 % init(2)
 
     call locDict2 % store('type','keffImplicitClerk')
     call locDict1 % store('keff', locDict2)
     call locDict1 % store('display',['keff'])
+    call locDict1 % store('mpiSync', 1)
 
     allocate(self % activeAtch)
     call self % activeAtch % init(locDict1)
@@ -600,4 +626,5 @@ subroutine printSettings(self)
 
   end subroutine printSettings
 
+
 end module eigenPhysicsPackage_class
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index aaf3ad00f..3cca40ce0 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -3,7 +3,7 @@ module fixedSourcePhysicsPackage_class
   use numPrecision
   use universalVariables
   use endfConstants
-  use genericProcedures,              only : fatalError, numToChar, rotateVector
+  use genericProcedures,              only : numToChar, rotateVector
   use display_func,                   only : printFishLineR, statusMsg, printSectionStart, printSectionEnd, &
                                              printSeparatorLine
   use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
@@ -13,6 +13,7 @@ module fixedSourcePhysicsPackage_class
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
   use outputFile_class,               only : outputFile
+  use errors_mod,                     only : fatalError
 
   ! Timers
   use timer_mod,                      only : registerTimer, timerStart, timerStop, &
@@ -175,7 +176,7 @@ subroutine cycles(self, tally, N_cycles)
     call timerReset(self % timerMain)
     call timerStart(self % timerMain)
 
-    do i=1,N_cycles
+    do i = 1, N_cycles
 
       ! Send start of cycle report
       call self % fixedSource % generate(self % thisCycle, nParticles, self % pRNG)
@@ -183,7 +184,7 @@ subroutine cycles(self, tally, N_cycles)
       ! Update RNG after source generation
       call self % pRNG % stride(self % totalPop)
 
-      if(self % printSource == 1) then
+      if (self % printSource == 1) then
         call self % thisCycle % printToFile(trim(self % outputFile)//'_source'//numToChar(i))
       end if
 
@@ -211,10 +212,10 @@ subroutine cycles(self, tally, N_cycles)
           ! Transport particle until its death
           history: do
             call transOp % transport(p, tally, buffer, buffer)
-            if(p % isDead) exit history
+            if (p % isDead) exit history
 
             call collOp % collide(p, tally, buffer, buffer)
-            if(p % isDead) exit history
+            if (p % isDead) exit history
           end do history
 
           ! If buffer is quite full, shift some particles to the commonBuffer
@@ -332,7 +333,7 @@ subroutine init(self, dict)
     call cpu_time(self % CPU_time_start)
 
     ! Read calculation settings
-    call dict % get( self % totalPop,'pop')
+    call dict % get(self % totalPop,'pop')
     self % pop = getWorkshare(self % totalPop)
 
     call dict % get( self % N_cycles,'cycles')
@@ -368,7 +369,7 @@ subroutine init(self, dict)
 
     ! *** It is a bit silly but dictionary cannot store longInt for now
     !     so seeds are limited to 32 bits (can be -ve)
-    if( dict % isPresent('seed')) then
+    if (dict % isPresent('seed')) then
       call dict % get(seed_temp,'seed')
 
     else
diff --git a/RandomNumbers/RNG_class.f90 b/RandomNumbers/RNG_class.f90
index 2baf68aac..82a9ee4d0 100644
--- a/RandomNumbers/RNG_class.f90
+++ b/RandomNumbers/RNG_class.f90
@@ -7,7 +7,7 @@ module rng_class
   !!
   !! Linear congruential 63 bit random number generator
   !!
-  !! Follows recurrance formula: xi(i+1) = (g * xi(i) + c) mod M
+  !! Follows recurrence formula: xi(i+1) = (g * xi(i) + c) mod M
   !!
   !! Global Parameters (values based on OpenMC):
   !!    g: multiplier = 2806196910506780709
@@ -201,7 +201,7 @@ end function getInt
   !! Justification for why the algorithim for evaluation of Ck works is made by me. [MAK]
   !!
   !! Assume that we are given a LCG in a state S0 and we are interested to find its state S_k after
-  !! k steps. Then starting with recurrence relation we can expand the recurrance k times:
+  !! k steps. Then starting with recurrence relation we can expand the recurrence k times:
   !!
   !! S_k = g * S_(k-1) + c (mod M)
   !! S_k = g * ([ g * S_(k-2) + c (mod M)]) + c (mod M) = g**2 * S_(k-2) + c * ( g + 1) (mod M)
@@ -220,7 +220,7 @@ end function getInt
   !! Where k_i denotes the i-th bit of k binary representation and can be 0 or 1. Now the evaluation
   !! of Gk is trivial by noting that (g**n)**k = 1 if k=0 or (g**n)**k = g**n if k = 1.
   !!
-  !! The evaluation of Ck is slightly more compilcated and it is based on two recurrance relations
+  !! The evaluation of Ck is slightly more compilcated and it is based on two recurrence relations
   !! for the sum of geometric series. Denote L(k) to be a geometric series such that
   !! L(k) = 1 + g + g**2 + g**3 + ... + g**(k-1). And define L(0) = 0.
   !! Then following relations hold:
@@ -233,8 +233,8 @@ end function getInt
   !!
   !! Relation 2) can be proven by expanding the sum and using (x**2-1) = (x+1)(x-1)
   !!
-  !! Using the binary expansion of k.  Denote highest bit index of k to be m.
-  !! Than using the recurrance relation 1) it can be shown that:
+  !! Using the binary expansion of k. Denote highest bit index of k to be m.
+  !! Than using the recurrence relation 1) it can be shown that:
   !!
   !! C(k) = C(k_m * 2**m + R_m)= C(R_m) * g**(2**m * k_m) + c * L(2**m * k_m)
   !!
@@ -270,9 +270,9 @@ subroutine skip(self, k_in)
       if (k_in >= 0) then
         k = k_in
       else
-        ! Line below Must be like that
+        ! Line below must be like that
         ! It is fully standard conforming
-        ! k = k + M which is more elegant can brake under compiler optimisation
+        ! k = k + M which is more elegant can break under compiler optimisation
         ! For example gfortran 8.3 with -O3
         ! NOTE: This assumes that M is 64bit and huge gives 2^63-1 !
         k = huge(M) - abs(k_in) + 1
@@ -284,17 +284,18 @@ subroutine skip(self, k_in)
 
       i = 1
       do while( k > 0)
-        if(iand(k, 1_int64) == 1) then ! Right-most bit is 1
+        if (iand(k, 1_int64) == 1) then ! Right-most bit is 1
           Gk = iand(Gk * gSq_to_i, bitMask)  ! Add to Gk
           Ck = iand(Ck * gSq_to_i, bitMask)  ! Add to Ck
           Ck = iand(Ck + L, bitMask)
-
         end if
+
         L = iand(L * (gSq_to_i+1), bitMask)           ! Calculate next value of L
         !gSq_to_i = iand(gSq_to_i*gSq_to_i, bitMask)  ! Calculate next power of g**2
         gSq_to_i = pow_of_gsq(i)                      ! Use tabulated values to avoid compiler bugs (Temporary)
         k = ishft(k, -1)                              ! Right shift k by 1
         i = i + 1
+
       end do
 
       ! Jump forward
diff --git a/SharedModules/genericProcedures.f90 b/SharedModules/genericProcedures.f90
index d319e848e..b1de6b6d9 100644
--- a/SharedModules/genericProcedures.f90
+++ b/SharedModules/genericProcedures.f90
@@ -1230,16 +1230,16 @@ recursive pure subroutine quickSort_shortInt(array)
     integer(shortInt)                              :: pivot
     integer(shortInt)                              :: i, maxSmall
 
-    if (size(array) > 1 ) then
+    if (size(array) > 1) then
       ! Set a pivot to the rightmost element
       pivot = size(array)
 
       ! Move all elements <= pivot to the LHS of the pivot
       ! Find position of the pivot in the array at the end (maxSmall)
       maxSmall = 0
-      do i=1,size(array)
+      do i = 1,size(array)
 
-        if( array(i) <= array(pivot)) then
+        if (array(i) <= array(pivot)) then
           maxSmall = maxSmall + 1
           call swap(array(i),array(maxSmall))
         end if
@@ -1259,16 +1259,16 @@ recursive pure subroutine quickSort_defReal(array)
     real(defReal), dimension(:), intent(inout) :: array
     integer(shortInt)                          :: i, maxSmall, pivot
 
-    if (size(array) > 1 ) then
+    if (size(array) > 1) then
       ! Set a pivot to the rightmost element
       pivot = size(array)
 
       ! Move all elements <= pivot to the LHS of the pivot
       ! Find position of the pivot in the array at the end (maxSmall)
       maxSmall = 0
-      do i=1,size(array)
+      do i = 1,size(array)
 
-        if( array(i) <= array(pivot)) then
+        if (array(i) <= array(pivot)) then
           maxSmall = maxSmall + 1
           call swap(array(i),array(maxSmall))
         end if
@@ -1292,20 +1292,20 @@ recursive subroutine quickSort_defReal_defReal(array1, array2)
     integer(shortInt)                          :: i, maxSmall, pivot
     character(100),parameter :: Here = 'quickSort_defReal_defReal (genericProcdures.f90)'
 
-    if(size(array1) /= size(array2)) then
+    if (size(array1) /= size(array2)) then
       call fatalError(Here,'Arrays have diffrent size!')
     end if
 
-    if (size(array1) > 1 ) then
+    if (size(array1) > 1) then
       ! Set a pivot to the rightmost element
       pivot = size(array1)
 
       ! Move all elements <= pivot to the LHS of the pivot
       ! Find position of the pivot in the array1 at the end (maxSmall)
       maxSmall = 0
-      do i=1,size(array1)
+      do i = 1,size(array1)
 
-        if( array1(i) <= array1(pivot)) then
+        if (array1(i) <= array1(pivot)) then
           maxSmall = maxSmall + 1
           call swap(array1(i), array2(i), array1(maxSmall), array2(maxSmall))
         end if
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 53fe05998..7de45a336 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -3,18 +3,41 @@ module mpi_func
 #ifdef MPI
   use mpi_f08
 #endif
-  !use genericProcedures, only : numToChar
   use errors_mod,        only : fatalError
+
   implicit none
 
-  integer(shortInt), private :: worldSize
-  integer(shortInt), private :: rank
-  integer(shortInt), parameter  :: MASTER_RANK = 0
+  integer(shortInt), private   :: worldSize = 1
+  integer(shortInt), private   :: rank = 0
+  integer(shortInt), parameter :: MASTER_RANK = 0
+
+  !! Public type that replicates exactly particleState
+  !!
+  !! It is necessary for load balancing in the dungeon: particles have to be
+  !! transferred betwen processes, and MPI doesn't allow to transfer types with
+  !! type-bound procedures
+  type, public :: particleStateDummy
+    real(defReal)              :: wgt
+    real(defReal),dimension(3) :: r
+    real(defReal),dimension(3) :: dir
+    real(defReal)              :: E
+    integer(shortInt)          :: G
+    logical(defBool)           :: isMG
+    integer(shortInt)          :: type
+    real(defReal)              :: time
+    integer(shortInt)          :: matIdx
+    integer(shortInt)          :: cellIdx
+    integer(shortInt)          :: uniqueID
+    integer(shortInt)          :: collisionN
+    integer(shortInt)          :: broodID
+  end type particleStateDummy
 
   !! Common MPI types
 #ifdef MPI
-  type(MPI_Datatype)            :: MPI_DEFREAL
-  type(MPI_Datatype)            :: MPI_SHORTINT
+  type(MPI_Datatype)   :: MPI_DEFREAL
+  type(MPI_Datatype)   :: MPI_SHORTINT
+  type(MPI_Datatype)   :: MPI_LONGINT
+  type(MPI_Datatype)   :: MPI_PARTICLE_STATE
 #endif
 
 contains
@@ -26,28 +49,64 @@ module mpi_func
   !!
   subroutine mpiInit()
 #ifdef MPI
-    integer(shortInt) :: ierr
+    integer(shortInt)        :: ierr, stateSize
+    type(particleStateDummy) :: state
+    integer(kind = MPI_ADDRESS_KIND), dimension(:), allocatable :: displacements
+    integer(shortInt), dimension(:), allocatable                :: blockLengths
+    type(MPI_Datatype), dimension(:), allocatable               :: types
 
     call mpi_init(ierr)
 
+    ! Read number of processes and rank of each process
     call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
-
     call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
 
-    ! Define MPI Type for DEFREAL
+    ! Define MPI type for DEFREAL
     call mpi_type_create_f90_real(precision(1.0_defReal), range(1.0_defReal), &
                                   MPI_DEFREAL, ierr)
-
     call mpi_type_commit(MPI_DEFREAL, ierr)
 
-    ! Define MPI Type for SHORTINT
+    ! Define MPI type for SHORTINT
     call mpi_type_create_f90_integer(range(1_shortInt), MPI_SHORTINT, ierr)
     call mpi_type_commit(MPI_SHORTINT, ierr)
 
-#else
-    worldSize = 1
-    rank = 0
+    ! Define MPI type for LONGINT
+    call mpi_type_create_f90_integer(range(1_longInt), MPI_LONGINT, ierr)
+    call mpi_type_commit(MPI_LONGINT, ierr)
+
+    ! Define MPI type for particleState
+    ! Note that particleState has stateSize = 13 attributes; if an attribute is
+    ! added to particleState, it had to be added here too
+    stateSize = 13
+    allocate(displacements(stateSize), blockLengths(stateSize), types(stateSize))
+
+    ! Create arrays with dimension and type of each property of particleStateDummy
+    blockLengths = (/1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/)
+    types = (/MPI_DEFREAL, MPI_DEFREAL, MPI_DEFREAL, MPI_DEFREAL, MPI_SHORTINT, MPI_LOGICAL, MPI_SHORTINT, &
+              MPI_DEFREAL, MPI_SHORTINT, MPI_SHORTINT, MPI_SHORTINT, MPI_SHORTINT, MPI_SHORTINT/)
+
+    ! Create array of memory byte displacements
+    call mpi_get_address(state % wgt, displacements(1), ierr)
+    call mpi_get_address(state % r, displacements(2), ierr)
+    call mpi_get_address(state % dir, displacements(3), ierr)
+    call mpi_get_address(state % E, displacements(4), ierr)
+    call mpi_get_address(state % G, displacements(5), ierr)
+    call mpi_get_address(state % isMG, displacements(6), ierr)
+    call mpi_get_address(state % type, displacements(7), ierr)
+    call mpi_get_address(state % time, displacements(8), ierr)
+    call mpi_get_address(state % matIdx, displacements(9), ierr)
+    call mpi_get_address(state % cellIdx, displacements(10), ierr)
+    call mpi_get_address(state % uniqueID, displacements(11), ierr)
+    call mpi_get_address(state % collisionN, displacements(12), ierr)
+    call mpi_get_address(state % broodID, displacements(13), ierr)
+    displacements = displacements - displacements(1)
+
+    ! Define new type
+    call mpi_type_create_struct(stateSize, blockLengths, displacements, types, MPI_PARTICLE_STATE, ierr)
+    call mpi_type_commit(MPI_PARTICLE_STATE, ierr)
+
 #endif
+
   end subroutine mpiInit
 
   !!
@@ -101,7 +160,6 @@ function getOffset(N) result(offset)
 
   end function getOffset
 
-
   !!
   !! Get MPI world size
   !!
@@ -110,7 +168,9 @@ end function getOffset
   !!
   function getMPIWorldSize() result(size)
     integer(shortInt) :: size
+
     size = worldSize
+
   end function getMPIWorldSize
 
   !!
@@ -133,7 +193,10 @@ end function isMPIMaster
   !!
   function getMPIRank() result(r)
     integer(shortInt) :: r
+
     r = rank
+
   end function getMPIRank
 
+
 end module mpi_func
diff --git a/Tallies/TallyClerks/centreOfMassClerk_class.f90 b/Tallies/TallyClerks/centreOfMassClerk_class.f90
index f86c12207..8f43e6a5b 100644
--- a/Tallies/TallyClerks/centreOfMassClerk_class.f90
+++ b/Tallies/TallyClerks/centreOfMassClerk_class.f90
@@ -114,7 +114,7 @@ subroutine reportCycleEnd(self, end, mem)
 
       ! Loop through population, scoring probabilities
       do i = 1,end % popSize()
-        associate( state => end % get(i) )
+        associate(state => end % get(i))
           self % value(cc,:) = self % value(cc,:) + state % wgt * state % r
         end associate
       end do
@@ -181,7 +181,7 @@ end subroutine print
   elemental subroutine kill(self)
     class(centreOfMassClerk), intent(inout) :: self
 
-    if(allocated(self % value)) deallocate(self % value)
+    if (allocated(self % value)) deallocate(self % value)
     self % currentCycle = 0
     self % maxCycles = 0
 
diff --git a/Tallies/TallyClerks/dancoffBellClerk_class.f90 b/Tallies/TallyClerks/dancoffBellClerk_class.f90
index f1f85eaf7..8f269b70c 100644
--- a/Tallies/TallyClerks/dancoffBellClerk_class.f90
+++ b/Tallies/TallyClerks/dancoffBellClerk_class.f90
@@ -194,28 +194,28 @@ subroutine reportTrans(self, p, xsData, mem)
 
     ! Find start material type; Exit if not fuel
     T_start = self % materialSet % getOrDefault(p % preTransition % matIdx, OUTSIDE)
-    if( T_start /= FUEL) return
+    if (T_start /= FUEL) return
 
     ! Exit if outside energy range
     state = p
-    if(.not.self % filter % isPass(state)) return
+    if (.not.self % filter % isPass(state)) return
 
     ! Find end material type; Exit if not fuel or moderator
     T_end = self % materialSet % getOrDefault(p % matIdx(), OUTSIDE)
-    if(T_end == OUTSIDE) return
+    if (T_end == OUTSIDE) return
 
     ! Obtain starting and ending weights
     w_end   = p % w
 
     ! Add to approperiate bins
-    select case(T_end)
-      case(MODERATOR)
+    select case (T_end)
+      case (MODERATOR)
         ! Get XS
         SigmaTot = xsData % getTotalMatXS(p, p % preTransition % matIdx)
 
         call mem % score(w_end * SigmaTot, self % getMemAddress() + ESC_PROB_TOTXS)
 
-      case(FUEL)
+      case (FUEL)
         call mem % score(w_end, self % getMemAddress() + STAY_PROB)
 
       case default
@@ -236,11 +236,10 @@ subroutine reportCycleEnd(self, end, mem)
     type(scoreMemory), intent(inout)        :: mem
     real(defReal)                           :: escSigmaT, fuelWgt
 
-    if( mem % lastCycle() ) then
+    if (mem % lastCycle()) then
       escSigmaT = mem % getScore(self % getMemAddress() + ESC_PROB_TOTXS)
       fuelWgt   = mem % getScore(self % getMemAddress() + STAY_PROB)
-      print *, escSigmaT, fuelWgt
-      call mem % accumulate( escSigmaT / fuelWgt, self % getMemAddress() + D_EFF)
+      call mem % accumulate(escSigmaT / fuelWgt, self % getMemAddress() + D_EFF)
     end if
 
   end subroutine reportCycleEnd
diff --git a/Tallies/TallyClerks/keffAnalogClerk_class.f90 b/Tallies/TallyClerks/keffAnalogClerk_class.f90
index c4a4df264..ce79a75e5 100644
--- a/Tallies/TallyClerks/keffAnalogClerk_class.f90
+++ b/Tallies/TallyClerks/keffAnalogClerk_class.f90
@@ -14,6 +14,11 @@ module keffAnalogClerk_class
   use tallyResult_class,     only : tallyResult, tallyResultEmpty
   use tallyClerk_inter,      only : tallyClerk, kill_super => kill
 
+#ifdef MPI
+  use mpi_func,              only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, &
+                                    MASTER_RANK, isMPIMaster
+#endif
+
   implicit none
   private
 
@@ -135,10 +140,18 @@ subroutine reportCycleStart(self, start, mem)
     class(keffAnalogClerk), intent(inout) :: self
     class(particleDungeon), intent(in)    :: start
     type(scoreMemory), intent(inout)      :: mem
+#ifdef MPI
+    integer(shortInt)                     :: error
+    real(defReal)                         :: buffer
+#endif
 
     ! Update start population weight
     self % startPopWgt = self % startPopWgt + start % popWeight()
 
+#ifdef MPI
+    call mpi_reduce(self % startPopWgt, buffer, 1, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+    if (isMPIMaster()) self % startPopWgt = buffer
+#endif
 
   end subroutine reportCycleStart
 
@@ -152,20 +165,34 @@ subroutine reportCycleEnd(self, end, mem)
     class(particleDungeon), intent(in)    :: end
     type(scoreMemory), intent(inout)      :: mem
     real(defReal)                         :: k_norm, k_eff
+#ifdef MPI
+    integer(shortInt)                     :: error
+    real(defReal)                         :: buffer
+#endif
 
     ! Update end population weight
     self % endPopWgt = self % endPopWgt + end % popWeight()
 
+#ifdef MPI
+    call mpi_reduce(self % endPopWgt, buffer, 1, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+    if (isMPIMaster()) then
+      self % endPopWgt = buffer
+    else
+      self % endPopWgt = ZERO
+    end if
+#endif
+
     ! Close batch
-    if( mem % lastCycle() ) then
+    if (mem % lastCycle()) then
       k_norm = end % k_eff
 
       ! Calculate and score analog estimate of k-eff
       k_eff =  self % endPopWgt / self % startPopWgt * k_norm
-      call mem % accumulate(k_eff, self % getMemAddress() )
+      call mem % accumulate(k_eff, self % getMemAddress())
 
       self % startPopWgt = ZERO
       self % endPopWgt   = ZERO
+
     end if
 
   end subroutine reportCycleEnd
diff --git a/Tallies/TallyClerks/keffImplicitClerk_class.f90 b/Tallies/TallyClerks/keffImplicitClerk_class.f90
index 7ff26dd76..3d4c96f9f 100644
--- a/Tallies/TallyClerks/keffImplicitClerk_class.f90
+++ b/Tallies/TallyClerks/keffImplicitClerk_class.f90
@@ -3,7 +3,7 @@ module keffImplicitClerk_class
   use numPrecision
   use tallyCodes
   use endfConstants
-  use universalVariables,         only : MAX_COL
+  use universalVariables,         only : VOID_MAT, TRACKING_XS, MAX_COL
   use genericProcedures,          only : fatalError, charCmp
   use display_func,               only : statusMsg
   use dictionary_class,           only : dictionary
diff --git a/Tallies/TallyClerks/shannonEntropyClerk_class.f90 b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
index 9202f6418..9a941e5e5 100644
--- a/Tallies/TallyClerks/shannonEntropyClerk_class.f90
+++ b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
@@ -187,7 +187,7 @@ subroutine print(self, outFile, mem)
 
     call outFile % startArray(name, [self % maxCycles])
 
-    do i=1,self % maxCycles
+    do i = 1,self % maxCycles
       call outFile % addValue(self % value(i))
     end do
     call outFile % endArray()
@@ -202,9 +202,9 @@ end subroutine print
   elemental subroutine kill(self)
     class(shannonEntropyClerk), intent(inout) :: self
 
-    if(allocated(self % map)) deallocate(self % map)
-    if(allocated(self % prob)) deallocate(self % prob)
-    if(allocated(self % value)) deallocate(self % value)
+    if (allocated(self % map)) deallocate(self % map)
+    if (allocated(self % prob)) deallocate(self % prob)
+    if (allocated(self % value)) deallocate(self % value)
     self % N = 0
     self % currentCycle = 0
     self % maxCycles = 0
diff --git a/Tallies/TallyClerks/trackClerk_class.f90 b/Tallies/TallyClerks/trackClerk_class.f90
index 6b624da31..f94ffb7e4 100644
--- a/Tallies/TallyClerks/trackClerk_class.f90
+++ b/Tallies/TallyClerks/trackClerk_class.f90
@@ -98,12 +98,12 @@ subroutine init(self, dict, name)
     call self % setName(name)
 
     ! Load filetr
-    if( dict % isPresent('filter')) then
+    if (dict % isPresent('filter')) then
       call new_tallyFilter(self % filter, dict % getDictPtr('filter'))
     end if
 
     ! Load map
-    if( dict % isPresent('map')) then
+    if (dict % isPresent('map')) then
       call new_tallyMap(self % map, dict % getDictPtr('map'))
     end if
 
@@ -112,7 +112,7 @@ subroutine init(self, dict, name)
 
     ! Load responses
     allocate(self % response(size(responseNames)))
-    do i=1, size(responseNames)
+    do i = 1, size(responseNames)
       call self % response(i) % init(dict % getDictPtr( responseNames(i) ))
     end do
 
@@ -131,18 +131,18 @@ elemental subroutine kill(self)
     call kill_super(self)
 
     ! Kill and deallocate filter
-    if(allocated(self % filter)) then
+    if (allocated(self % filter)) then
       deallocate(self % filter)
     end if
 
     ! Kill and deallocate map
-    if(allocated(self % map)) then
+    if (allocated(self % map)) then
       call self % map % kill()
       deallocate(self % map)
     end if
 
     ! Kill and deallocate responses
-    if(allocated(self % response)) then
+    if (allocated(self % response)) then
       deallocate(self % response)
     end if
 
@@ -173,7 +173,7 @@ elemental function getSize(self) result(S)
     integer(shortInt)                 :: S
 
     S = size(self % response)
-    if(allocated(self % map)) S = S * self % map % bins(0)
+    if (allocated(self % map)) S = S * self % map % bins(0)
 
   end function getSize
 
@@ -199,12 +199,12 @@ subroutine reportPath(self, p, L, xsData,mem)
     state = p % prePath
 
     ! Check if within filter
-    if(allocated( self % filter)) then
-      if(self % filter % isFail(state)) return
+    if (allocated( self % filter)) then
+      if (self % filter % isFail(state)) return
     end if
 
     ! Find bin index
-    if(allocated(self % map)) then
+    if (allocated(self % map)) then
       binIdx = self % map % map(state)
     else
       binIdx = 1
@@ -224,7 +224,7 @@ subroutine reportPath(self, p, L, xsData,mem)
     flx = L
 
     ! Append all bins
-    do i=1,self % width
+    do i = 1,self % width
       scoreVal = self % response(i) % get(pTmp, xsData) * p % w * flx
       call mem % score(scoreVal, adrr + i)
     end do
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index b835d41bd..4e98be7dd 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -2,7 +2,8 @@ module scoreMemory_class
 
   use numPrecision
 #ifdef MPI
-  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK, isMPIMaster, MPI_SHORTINT
+  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, &
+                                 MASTER_RANK, isMPIMaster, MPI_SHORTINT
 #endif
   use universalVariables, only : array_pad
   use genericProcedures,  only : fatalError, numToChar
@@ -136,13 +137,13 @@ subroutine init(self, N, id, batchSize, reduced)
     character(100), parameter :: Here= 'init (scoreMemory_class.f90)'
 
     ! Allocate space and zero all bins
-    allocate( self % bins(N, DIM2))
+    allocate(self % bins(N, DIM2))
     self % bins = ZERO
 
     self % nThreads = ompGetMaxThreads()
 
     ! Note the array padding to avoid false sharing
-    allocate( self % parallelBins(N + array_pad, self % nThreads))
+    allocate(self % parallelBins(N + array_pad, self % nThreads))
     self % parallelBins = ZERO
 
     ! Save size of memory
@@ -156,8 +157,8 @@ subroutine init(self, N, id, batchSize, reduced)
     self % cycles    = 0
     self % batchSize = 1
 
-    if(present(batchSize)) then
-      if(batchSize > 0) then
+    if (present(batchSize)) then
+      if (batchSize > 0) then
         self % batchSize = batchSize
       else
         call fatalError(Here,'Batch Size of: '// numToChar(batchSize) //' is invalid')
@@ -291,7 +292,7 @@ subroutine closeCycle(self, normFactor)
     ! Increment Cycle Counter
     self % cycles = self % cycles + 1
 
-    if(mod(self % cycles, self % batchSize) == 0) then ! Close Batch
+    if (mod(self % cycles, self % batchSize) == 0) then ! Close Batch
 
       !$omp parallel do
       do i = 1, self % N
@@ -328,7 +329,7 @@ subroutine closeBin(self, normFactor, idx)
     character(100),parameter :: Here = 'closeBin (scoreMemory_class.f90)'
 
     ! Verify bounds for the index
-    if( idx < 0_longInt .or. idx > self % N) then
+    if (idx < 0_longInt .or. idx > self % N) then
       call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
                             & memory with size '//numToChar(self % N))
     end if
@@ -404,7 +405,7 @@ end subroutine reduceBins
   !!
   !! Reduce the accumulated results (csum and csum2) from different MPI processes
   !!
-  !! If the bins are `reduced` that is scores are collected at the end of each cycle,
+  !! If the bins are `reduced` (that is, scores are collected at the end of each cycle),
   !! then this subroutine does nothing. Otherwise it collects the results from different
   !! processes and stores them in the master process.
   !!
@@ -449,7 +450,7 @@ end subroutine collectDistributed
   !!
   !! Result:
   !!   The sum of the data across all processes in stored on master process `data`
-  !!   The buffer is set to ZERO on all processes ( only 1:size(data) range)!
+  !!   The buffer is set to ZERO on all processes (only 1:size(data) range)!
   !!
   !! Errors:
   !!   fatalError if size of the buffer is insufficient
@@ -461,11 +462,12 @@ subroutine reduceArray(data, buffer)
     integer(longInt)                           :: N, chunk, start
     integer(shortInt)                          :: error
     character(100),parameter :: Here = 'reduceArray (scoreMemory_class.f90)'
+
     ! We need to be careful to support sizes larger than 2^31
-    N = size(data, kind=longInt)
+    N = size(data, kind = longInt)
 
     ! Check if the buffer is large enough
-    if (size(buffer, kind=longInt) < N) then
+    if (size(buffer, kind = longInt) < N) then
       call fatalError(Here, 'Buffer is too small to store the reduced data')
     end if
 
@@ -475,8 +477,8 @@ subroutine reduceArray(data, buffer)
     start = 1
 
     do while (start <= N)
-      chunk = min(N - start + 1, int(huge(1_shortInt), longInt))
 
+      chunk = min(N - start + 1, int(huge(1_shortInt), longInt))
       call mpi_reduce(data(start : start + chunk - 1), &
                       buffer(start : start + chunk - 1), &
                       int(chunk, shortInt), &
@@ -485,7 +487,9 @@ subroutine reduceArray(data, buffer)
                       MASTER_RANK, &
                       MPI_COMM_WORLD, &
                       error)
+
       start = start + chunk
+
     end do
 
     ! Copy the result back to data
@@ -512,14 +516,14 @@ elemental subroutine getResult_withSTD(self, mean, STD, idx, samples)
     real(defReal)                          :: inv_N, inv_Nm1
 
     !! Verify index. Return 0 if not present
-    if( idx < 0_longInt .or. idx > self % N) then
+    if (idx < 0_longInt .or. idx > self % N) then
       mean = ZERO
       STD = ZERO
       return
     end if
 
     ! Check if # of samples is provided
-    if( present(samples)) then
+    if (present(samples)) then
       N = samples
     else
       N = self % batchN
@@ -530,7 +534,7 @@ elemental subroutine getResult_withSTD(self, mean, STD, idx, samples)
 
     ! Calculate STD
     inv_N   = ONE / N
-    if( N /= 1) then
+    if (N /= 1) then
       inv_Nm1 = ONE / (N - 1)
     else
       inv_Nm1 = ONE
@@ -579,7 +583,7 @@ elemental function getScore(self, idx) result (score)
     integer(longInt), intent(in)   :: idx
     real(defReal)                  :: score
 
-    if(idx <= 0_longInt .or. idx > self % N) then
+    if (idx <= 0_longInt .or. idx > self % N) then
       score = ZERO
     else
       score = self % bins(idx, BIN)
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index 063bef142..bbeb232e9 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -58,6 +58,7 @@ module tallyAdmin_class
   !!   cycleEndClerks   -> List of indices of all clerks that require cycleEndReport
   !!   displayList      -> List of indices of all clerks that are registered for display
   !!   mem              -> Score Memory for all defined clerks
+  !!   mpiSync          ->
   !!
   !! Interface:
   !!   init   -> Initialise from dictionary
@@ -191,30 +192,30 @@ subroutine init(self,dict)
     allocate(self % tallyClerks(size(names)))
 
     ! Load clerks into slots and clerk names into map
-    do i=1,size(names)
+    do i = 1,size(names)
       call self % tallyClerks(i) % init(dict % getDictPtr(names(i)), names(i))
       call self % clerksNameMap % add(names(i),i)
 
     end do
 
     ! Register all clerks to recive their reports
-    do i=1,size(self % tallyClerks)
+    do i = 1,size(self % tallyClerks)
       associate( reports => self % tallyClerks(i) % validReports() )
-        do j=1,size(reports)
+        do j = 1,size(reports)
           call self % addToReports(reports(j), i)
-
         end do
       end associate
     end do
 
     ! Obtain names of clerks to display
-    if( dict % isPresent('display')) then
+    if (dict % isPresent('display')) then
       call dict % get(names,'display')
 
       ! Register all clerks to display
-      do i=1,size(names)
-        call self % displayList % add( self % clerksNameMap % get(names(i)))
+      do i = 1,size(names)
+        call self % displayList % add(self % clerksNameMap % get(names(i)))
       end do
+
     end if
 
     ! Read batching size
@@ -231,19 +232,19 @@ subroutine init(self,dict)
 
     ! Assign memory locations to the clerks
     memLoc = 2
-    do i=1,size(self % tallyClerks)
+    do i = 1,size(self % tallyClerks)
       call self % tallyClerks(i) % setMemAddress(memLoc)
       memLoc = memLoc + self % tallyClerks(i) % getSize()
 
     end do
 
     ! Verify that final memLoc and memSize are consistant
-    if(memLoc - 1 /= memSize) then
+    if (memLoc - 1 /= memSize) then
       call fatalError(Here, 'Memory addressing failed.')
     end if
 
     ! Read name of normalisation clerks if present
-    if(dict % isPresent('norm')) then
+    if (dict % isPresent('norm')) then
       call dict % get(self % normClerkName, 'norm')
       call dict % get(self % normValue, 'normVal')
       i = self % clerksNameMap % get(self % normClerkName)
@@ -387,12 +388,12 @@ recursive subroutine display(self)
     integer(shortInt)             :: idx
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call display(self % atch)
     end if
 
     ! Go through all clerks marked as part of the display
-    do i=1,self % displayList % getSize()
+    do i = 1,self % displayList % getSize()
       idx = self % displayList % get(i)
       call self % tallyClerks(idx) % display(self % mem)
 
@@ -443,7 +444,7 @@ subroutine print(self,output)
     call output % printValue(self % mem % getBatchSize(), name)
 
     ! Print Clerk results
-    do i=1,size(self % tallyClerks)
+    do i = 1,size(self % tallyClerks)
       call self % tallyClerks(i) % print(output, self % mem)
     end do
 
@@ -471,7 +472,7 @@ recursive subroutine reportInColl(self, p, virtual)
     character(100), parameter :: Here = "reportInColl (tallyAdmin_class.f90)"
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportInColl(self % atch, p, virtual)
     end if
 
@@ -479,7 +480,7 @@ recursive subroutine reportInColl(self, p, virtual)
     xsData => ndReg_get(p % getType(), where = Here)
 
     ! Go through all clerks that request the report
-    do i=1,self % inCollClerks % getSize()
+    do i = 1,self % inCollClerks % getSize()
       idx = self % inCollClerks % get(i)
       call self % tallyClerks(idx) % reportInColl(p, xsData, self % mem, virtual)
 
@@ -511,7 +512,7 @@ recursive subroutine reportOutColl(self, p, MT, muL)
     character(100), parameter :: Here = "reportOutColl (tallyAdmin_class.f90)"
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportOutColl(self % atch, p, MT, muL)
     end if
 
@@ -519,7 +520,7 @@ recursive subroutine reportOutColl(self, p, MT, muL)
     xsData => ndReg_get(p % getType(), where = Here)
 
     ! Go through all clerks that request the report
-    do i=1,self % outCollClerks % getSize()
+    do i = 1,self % outCollClerks % getSize()
       idx = self % outCollClerks % get(i)
       call self % tallyClerks(idx) % reportOutColl(p, MT, muL, xsData, self % mem)
 
@@ -550,7 +551,7 @@ recursive subroutine reportPath(self, p, L)
     character(100), parameter :: Here = "reportPath (tallyAdmin_class.f90)"
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportPath(self % atch, p, L)
     end if
 
@@ -558,7 +559,7 @@ recursive subroutine reportPath(self, p, L)
     xsData => ndReg_get(p % getType(), where = Here)
 
     ! Go through all clerks that request the report
-    do i=1,self % pathClerks % getSize()
+    do i = 1,self % pathClerks % getSize()
       idx = self % pathClerks % get(i)
       call self % tallyClerks(idx) % reportPath(p, L, xsData, self % mem)
 
@@ -586,7 +587,7 @@ recursive subroutine reportTrans(self, p)
     character(100), parameter :: Here = "reportTrans (tallyAdmin_class.f90)"
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportTrans(self % atch, p)
     end if
 
@@ -594,7 +595,7 @@ recursive subroutine reportTrans(self, p)
     xsData => ndReg_get(p % getType(), where = Here)
 
     ! Go through all clerks that request the report
-    do i=1,self % transClerks % getSize()
+    do i = 1,self % transClerks % getSize()
       idx = self % transClerks % get(i)
       call self % tallyClerks(idx) % reportTrans(p, xsData, self % mem)
 
@@ -627,7 +628,7 @@ recursive subroutine reportSpawn(self, MT, pOld, pNew)
     character(100), parameter :: Here = "reportSpwan (tallyAdmin_class.f90)"
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportSpawn(self % atch, MT, pOld, pNew)
     end if
 
@@ -635,7 +636,7 @@ recursive subroutine reportSpawn(self, MT, pOld, pNew)
     xsData => ndReg_get(pOld % getType(), where = Here)
 
     ! Go through all clerks that request the report
-    do i=1,self % spawnClerks % getSize()
+    do i = 1,self % spawnClerks % getSize()
       idx = self % spawnClerks % get(i)
       call self % tallyClerks(idx) % reportSpawn(MT, pOld, pNew, xsData, self % mem)
     end do
@@ -662,7 +663,7 @@ recursive subroutine reportHist(self, p)
     character(100), parameter :: Here = "reportHist (tallyAdmin_class.f90)"
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportHist(self % atch, p)
     end if
 
@@ -670,7 +671,7 @@ recursive subroutine reportHist(self, p)
     xsData => ndReg_get(p % getType(), where = Here)
 
     ! Go through all clerks that request the report
-    do i=1,self % histClerks % getSize()
+    do i = 1,self % histClerks % getSize()
       idx = self % histClerks % get(i)
       call self % tallyClerks(idx) % reportHist(p, xsData, self % mem)
 
@@ -704,13 +705,13 @@ recursive subroutine reportCycleStart(self, start)
     call self % mem % score(start % popWeight(), 1_longInt)
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportCycleStart(self % atch, start)
     end if
 
     ! Go through all clerks that request the report
     !$omp parallel do
-    do i=1,self % cycleStartClerks % getSize()
+    do i = 1,self % cycleStartClerks % getSize()
       idx = self % cycleStartClerks % get(i)
       call self % tallyClerks(idx) % reportCycleStart(start, self % mem)
     end do
@@ -725,8 +726,8 @@ end subroutine reportCycleStart
   !!   All particles given in "reportCycleStart" have been already transported
   !!   It is called after "reportCycleStart"
   !!   No modification or normalisation was applied to "end" particle Dungeon
-  !!   "k_eff" member of end is set to criticality used to adjust fission source (implicit
-  !!     fission site generation)
+  !!   "k_eff" member of end is set to criticality used to adjust fission source
+  !!   (implicit fission site generation)
   !!
   !! Args:
   !!   end [in] -> Particle Dungeon at the end of a cycle (before any normalisations)
@@ -734,7 +735,7 @@ end subroutine reportCycleStart
   !! Errors:
   !!   None
   !!
-  recursive subroutine reportCycleEnd(self,end)
+  recursive subroutine reportCycleEnd(self, end)
     class(tallyAdmin), intent(inout)   :: self
     class(particleDungeon), intent(in) :: end
     integer(shortInt)                  :: i
@@ -744,24 +745,24 @@ recursive subroutine reportCycleEnd(self,end)
     !$omp threadprivate(idx)
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call reportCycleEnd(self % atch, end)
     end if
 
     ! Reduce the scores across the threads and processes
     call self % mem % reduceBins()
 
-    if (isMPIMaster() .or. .not. self % mpiSync ) then
+    if (isMPIMaster() .or. .not. self % mpiSync) then
       ! Go through all clerks that request the report
       !$omp parallel do
-      do i=1,self % cycleEndClerks % getSize()
+      do i = 1,self % cycleEndClerks % getSize()
         idx = self % cycleEndClerks % get(i)
         call self % tallyClerks(idx) % reportCycleEnd(end, self % mem)
       end do
       !$omp end parallel do
 
       ! Calculate normalisation factor
-      if( self % normBInAddr /= NO_NORM ) then
+      if (self % normBInAddr /= NO_NORM) then
         normScore  = self % mem % getScore(self % normBinAddr)
         if (normScore == ZERO) then
           call fatalError(Here, 'Normalisation score from clerk:' // self % normClerkName // 'is 0')
@@ -773,8 +774,9 @@ recursive subroutine reportCycleEnd(self,end)
         normFactor = ONE
       end if
 
-      ! Close cycle multipling all scores by multiplication factor
+      ! Close cycle multipling all scores by a multiplication factor
       call self % mem % closeCycle(normFactor)
+
     end if
 
   end subroutine reportCycleEnd
@@ -798,7 +800,7 @@ pure subroutine getResult(self, res, name)
     integer(shortInt),parameter                   :: NOT_PRESENT = -3
 
     ! Deallocate if allocated result
-    if(allocated(res)) deallocate(res)
+    if (allocated(res)) deallocate(res)
 
     ! Copy name to character with nameLen
     name_loc = name
@@ -806,7 +808,7 @@ pure subroutine getResult(self, res, name)
     ! Find clerk index
     idx = self % clerksNameMap % getOrDefault(name_loc, NOT_PRESENT)
 
-    if(idx == NOT_PRESENT) then ! Return empty result
+    if (idx == NOT_PRESENT) then ! Return empty result
       allocate(res, source = tallyResultEmpty() )
 
     else ! Return result from the clerk named == name
@@ -823,7 +825,7 @@ recursive subroutine collectDistributed(self)
     class(tallyAdmin), intent(inout) :: self
 
     ! Call attachment
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call collectDistributed(self % atch)
     end if
 

From d5d1853e8384e52617c0090072266291d4d67a78 Mon Sep 17 00:00:00 2001
From: "V. Raffuzzi" <vr339@eng.cam.ac.uk>
Date: Wed, 18 Sep 2024 17:37:59 +0100
Subject: [PATCH 23/27] Adjust tallies to be compatible with MPI

---
 Apps/scone.f90                                |  2 +
 PhysicsPackages/eigenPhysicsPackage_class.f90 |  2 +
 .../Tests/collisionProbabilityClerk_test.f90  | 13 +--
 .../Tests/keffAnalogClerk_test.f90            | 36 ++++---
 .../Tests/keffImplicitClerk_test.f90          |  4 +-
 .../Tests/shannonEntropyClerk_test.f90        | 27 ++++--
 .../TallyClerks/Tests/simpleFMClerk_test.f90  | 22 ++---
 .../TallyClerks/centreOfMassClerk_class.f90   | 73 ++++++++++----
 .../collisionProbabilityClerk_class.f90       | 11 ++-
 .../TallyClerks/dancoffBellClerk_class.f90    | 11 +--
 Tallies/TallyClerks/keffAnalogClerk_class.f90 | 96 ++++++++-----------
 .../TallyClerks/keffImplicitClerk_class.f90   | 10 +-
 .../TallyClerks/shannonEntropyClerk_class.f90 | 89 ++++++++++++-----
 Tallies/TallyClerks/simpleFMClerk_class.f90   | 68 +++++++------
 Tallies/TallyClerks/tallyClerkSlot_class.f90  | 16 ++++
 Tallies/TallyClerks/tallyClerk_inter.f90      | 27 +++++-
 Tallies/scoreMemory_class.f90                 | 58 +++++------
 Tallies/tallyAdmin_class.f90                  | 38 +++++---
 Tallies/tallyCodes.f90                        |  3 +-
 19 files changed, 373 insertions(+), 233 deletions(-)

diff --git a/Apps/scone.f90 b/Apps/scone.f90
index 5b8cf96ac..b85f8a9bc 100644
--- a/Apps/scone.f90
+++ b/Apps/scone.f90
@@ -13,6 +13,7 @@ program scone
   use timer_mod                 , only : registerTimer, timerStart, timerStop, timerTime, secToChar
 
   implicit none
+
   type(dictionary)                  :: input
   class(physicsPackage),allocatable :: core
   character(:),allocatable          :: inputPath
@@ -65,4 +66,5 @@ program scone
 
   call statusMsg('Total calculation time: ' // trim(secToChar(timerTime(timerIdx))))
   call statusMsg('Have a good day and enjoy your result analysis!')
+
 end program scone
diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index ba7ef62f1..0df131bb6 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -561,12 +561,14 @@ subroutine init(self, dict)
 
     ! Initialise active and inactive tally attachments
     ! Inactive tally attachment
+    ! Note: mpiSync ensures that k_eff is synchronised between all processes each cycle
     call locDict1 % init(3)
     call locDict2 % init(2)
 
     call locDict2 % store('type','keffAnalogClerk')
     call locDict1 % store('keff', locDict2)
     call locDict1 % store('display',['keff'])
+    call locDict1 % store('mpiSync', 1)
 
     allocate(self % inactiveAtch)
     call self % inactiveAtch % init(locDict1)
diff --git a/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90 b/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90
index dbd832b3d..9ed5f797a 100644
--- a/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/collisionProbabilityClerk_test.f90
@@ -39,7 +39,6 @@ subroutine setUp(this)
     call mapDict % store('type','testMap')
     call mapDict % store('maxIdx',2)
 
-
     ! Build intput dictionary
     call dict % init(2)
     call dict % store('type','collisionProbabilityClerk')
@@ -48,9 +47,9 @@ subroutine setUp(this)
     name = 'testClerk'
     call this % clerk % init(dict,name)
 
-
     call mapDict % kill()
     call dict % kill()
+
   end subroutine setUp
 
   !!
@@ -82,9 +81,8 @@ subroutine testSimpleUseCase(this)
     class(tallyResult), allocatable                      :: res
     real(defReal), parameter :: TOL = 1.0E-7
 
-
     ! Create score memory
-    call mem % init(int(this % clerk % getSize(), longInt) , 1, batchSize = 1)
+    call mem % init(int(this % clerk % getSize(), longInt), 1, batchSize = 1)
     call this % clerk % setMemAddress(1_longInt)
 
     ! Create test transport Nuclear Data
@@ -132,11 +130,10 @@ subroutine testSimpleUseCase(this)
     p % preCollision % matIdx = 88
     call this % clerk % reportInColl(p, xsData, mem, .false.)
 
-
     ! Close cycle
     call mem % reduceBins()
-    call this % clerk % reportCycleEnd(pop, mem)
-    call mem % closeCycle(ONE)
+    call this % clerk % closeCycle(pop, mem)
+    call mem % closeCycle(TWO)
 
     ! Verify results
 
@@ -248,8 +245,6 @@ subroutine testSimpleUseCase(this)
 
   end subroutine testSimpleUseCase
 
-
-
   !!
   !! Test correctness of the printing calls
   !!
diff --git a/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90 b/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
index c98afff1c..37dd25722 100644
--- a/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
@@ -65,8 +65,8 @@ subroutine test1CycleBatch(this)
 
     ! Initialise objects
     call pit % init(4)
-    call mem % init(2_longInt,1)
-    call this  % clerk % setMemAddress(1_longInt)
+    call mem % init(3_longInt, 1)
+    call this % clerk % setMemAddress(1_longInt)
 
     ! Start cycle 1
     p % w = 1000.0_defReal
@@ -79,7 +79,9 @@ subroutine test1CycleBatch(this)
     call pit % detain(p)
     pit % k_eff = ONE
 
-    call this % clerk % reportCycleEnd(pit,mem)
+    call this % clerk % reportCycleEnd(pit, mem)
+    call mem % reduceBins()
+    call this % clerk % closeCycle(pit, mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Start cycle 2
@@ -94,14 +96,15 @@ subroutine test1CycleBatch(this)
     call pit % detain(p)
     pit % k_eff = 1.2_defReal
 
-    call mem % reduceBins()
     call this % clerk % reportCycleEnd(pit, mem)
+    call mem % reduceBins()
+    call this % clerk % closeCycle(pit, mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Validate results
 
     ! Directly from memory
-    call mem % getResult(k, STD, 1_longInt)
+    call mem % getResult(k, STD, 3_longInt)
     @assertEqual(1.1400_defReal, k, TOL, '1 Cycle Batch, keff from memory:')
     @assertEqual(0.0600_defReal, STD, TOL, '1 Cycle Batch, keff STD from memory:')
 
@@ -136,8 +139,8 @@ subroutine test2CycleBatch(this)
 
     ! Initialise objects
     call pit % init(4)
-    call mem % init(2_longInt,1, batchSize = 2 )
-    call this  % clerk % setMemAddress(1_longInt)
+    call mem % init(3_longInt, 1, batchSize = 2)
+    call this % clerk % setMemAddress(1_longInt)
 
     ! Start cycle 1
     p % w = 500.0_defReal
@@ -150,7 +153,8 @@ subroutine test2CycleBatch(this)
     call pit % detain(p)
     pit % k_eff = ONE
 
-    call this % clerk % reportCycleEnd(pit,mem)
+    call this % clerk % reportCycleEnd(pit, mem)
+    call this % clerk % closeCycle(pit, mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Start cycle 2
@@ -165,7 +169,9 @@ subroutine test2CycleBatch(this)
     call pit % detain(p)
     pit % k_eff = ONE
 
-    call this % clerk % reportCycleEnd(pit,mem)
+    call this % clerk % reportCycleEnd(pit, mem)
+    call mem % reduceBins()
+    call this % clerk % closeCycle(pit, mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Start cycle 3
@@ -181,6 +187,7 @@ subroutine test2CycleBatch(this)
     pit % k_eff = 1.2_defReal
 
     call this % clerk % reportCycleEnd(pit,mem)
+    call this % clerk % closeCycle(pit,mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Start cycle 4
@@ -196,13 +203,14 @@ subroutine test2CycleBatch(this)
     pit % k_eff = 1.2_defReal
 
     call this % clerk % reportCycleEnd(pit,mem)
+    call mem % reduceBins()
+    call this % clerk % closeCycle(pit,mem)
     call mem % closeCycle(0.8_defReal)
 
-
     ! Validate results
 
     ! Directly from memory
-    call mem % getResult(k, STD, 1_longInt)
+    call mem % getResult(k, STD, 3_longInt)
     @assertEqual(1.1400_defReal, k, TOL, '1 Cycle Batch, keff from memory:')
     @assertEqual(0.0600_defReal, STD, TOL, '1 Cycle Batch, keff STD from memory:')
 
@@ -231,11 +239,11 @@ subroutine testMisc(this)
     type(scoreMemory)                          :: mem
 
     ! Initialise objects
-    call mem % init(2_longInt,1)
-    call this  % clerk % setMemAddress(1_longInt)
+    call mem % init(2_longInt, 1)
+    call this % clerk % setMemAddress(1_longInt)
 
     ! Test getting size
-    @assertEqual(1, this % clerk % getSize(),'Test getSize() :')
+    @assertEqual(3, this % clerk % getSize(), 'Test getSize() :')
 
     ! Test output printing correctness
     call out % init('dummyPrinter', fatalErrors = .false.)
diff --git a/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90 b/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90
index 0b518f897..6bf598d8d 100644
--- a/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/keffImplicitClerk_test.f90
@@ -96,7 +96,7 @@ subroutine test1CycleBatch(this)
     ! End cycle
     call mem % reduceBins()
     call pit % detain(p)
-    call this % clerk % reportCycleEnd(pit, mem)
+    call this % clerk % closeCycle(pit, mem)
     call pit % release(p)
     call mem % closeCycle(ONE)
 
@@ -116,7 +116,7 @@ subroutine test1CycleBatch(this)
     ! End cycle
     call mem % reduceBins()
     call pit % detain(p)
-    call this % clerk % reportCycleEnd(pit, mem)
+    call this % clerk % closeCycle(pit, mem)
     call pit % release(p)
     call mem % closeCycle(ONE)
 
diff --git a/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90 b/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
index 6a8fea657..da64d9889 100644
--- a/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
@@ -49,6 +49,7 @@ subroutine setUp(this)
 
     call mapDict % kill()
     call dict % kill()
+
   end subroutine setUp
 
   !!
@@ -65,7 +66,6 @@ end subroutine tearDown
 !! PROPER TESTS BEGIN HERE
 !!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
 
-
   !!
   !! Test correctness in a simple use case
   !!
@@ -75,10 +75,12 @@ subroutine testSimpleUseCase(this)
     type(scoreMemory)                              :: mem
     type(particleState)                            :: phase
     type(particleDungeon)                          :: pop
+    real(defReal)                                  :: val
+    integer(longInt)                               :: idx
     real(defReal), parameter :: TOL = 1.0E-7
 
     ! Create score memory
-    call mem % init(int(this % clerk % getSize(), longInt) , 1, batchSize = 1)
+    call mem % init(int(this % clerk % getSize(), longInt), 1, batchSize = 1)
     call this % clerk % setMemAddress(1_longInt)
 
     ! Crate dungeon of original events
@@ -94,12 +96,16 @@ subroutine testSimpleUseCase(this)
     call pop % detain(phase)
 
     call this % clerk % reportCycleEnd(pop, mem)
+    call mem % reduceBins()
+    call this % clerk % closeCycle(pop, mem)
 
     ! Close cycle
     call mem % closeCycle(ONE)
 
     ! Verify results for uniform distribution
-    @assertEqual(ONE, this % clerk % value(1), TOL)
+    idx = this % clerk % getMemAddress() + 2
+    call mem % getResult(val, idx, samples = 1)
+    @assertEqual(ONE, val, TOL)
 
     ! Move particles to the same bin
     call pop % kill()
@@ -107,20 +113,22 @@ subroutine testSimpleUseCase(this)
     call pop % detain(phase)
     call pop % detain(phase)
 
-    call this % clerk % reportCycleEnd(pop,  mem)
+    call this % clerk % reportCycleEnd(pop, mem)
+    call mem % reduceBins()
+    call this % clerk % closeCycle(pop, mem)
 
     ! Close cycle
-    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
-    ! Verify results for all particles in one bine
-    @assertEqual(ZERO, this % clerk % value(2), TOL)
+    ! Verify results for all particles in one bin
+    idx = this % clerk % getMemAddress() + 3
+    call mem % getResult(val, idx, samples = 1)
+    @assertEqual(ZERO, val, TOL)
 
     ! Clean
     call pop % kill()
-  end subroutine testSimpleUseCase
-
 
+  end subroutine testSimpleUseCase
 
   !!
   !! Test correctness of the printing calls
@@ -143,4 +151,5 @@ subroutine testPrintingCorrectness(this)
 
   end subroutine testPrintingCorrectness
 
+
 end module shannonEntropyClerk_test
diff --git a/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90 b/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90
index e28358dde..1bb26d6de 100644
--- a/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/simpleFMClerk_test.f90
@@ -39,7 +39,6 @@ subroutine setUp(this)
     call mapDict % store('type','testMap')
     call mapDict % store('maxIdx',3)
 
-
     ! Build intput dictionary
     call dict % init(2)
     call dict % store('type','simpleFMClerk')
@@ -48,9 +47,9 @@ subroutine setUp(this)
     name = 'testClerk'
     call this % clerk % init(dict,name)
 
-
     call mapDict % kill()
     call dict % kill()
+
   end subroutine setUp
 
   !!
@@ -83,7 +82,6 @@ subroutine testSimpleUseCase(this)
     class(tallyResult), allocatable          :: res
     real(defReal), parameter :: TOL = 1.0E-7
 
-
     ! Create score memory
     call mem % init(int(this % clerk % getSize(), longInt) , 1, batchSize = 1)
     call this % clerk % setMemAddress(1_longInt)
@@ -123,33 +121,34 @@ subroutine testSimpleUseCase(this)
     p % preHistory % matIdx = 1
     call this % clerk % reportInColl(p, xsData, mem, .false.)
 
-    call this % clerk % reportCycleEnd(pop, mem)
+    call mem % reduceBins()
+    call this % clerk % closeCycle(pop, mem)
 
     ! Close cycle
-    call mem % reduceBins()
     call mem % closeCycle(ONE)
 
     ! Verify results
 
     ! Fission matrix
+    ! Result indexes start from 3_longInt
     ! 1 -> 1 Transition
-    call mem % getResult(val, 1_longInt)
+    call mem % getResult(val, 4_longInt)
     @assertEqual(1.818181818181_defReal ,val, TOL)
 
     ! 1 -> 2 Transition
-    call mem % getResult(val, 2_longInt)
+    call mem % getResult(val, 5_longInt)
     @assertEqual(ZERO, val, TOL)
 
     ! 1 -> 3 Transition
-    call mem % getResult(val, 3_longInt)
+    call mem % getResult(val, 6_longInt)
     @assertEqual(ZERO, val, TOL)
 
     ! 2 -> 1 Transition
-    call mem % getResult(val, 4_longInt)
+    call mem % getResult(val, 7_longInt)
     @assertEqual(2.0_defReal, val, TOL)
 
     ! 2 -> 2 Transition
-    call mem % getResult(val, 5_longInt)
+    call mem % getResult(val, 8_longInt)
     @assertEqual(1.27272727272727_defReal, val, TOL)
 
     ! Verify run-time result
@@ -186,7 +185,7 @@ subroutine testSimpleUseCase(this)
     select type(res)
       class is (FMresult)
         ! 1 -> 1 Transition
-        @assertEqual(1.818181818181_defReal ,res % FM(1,1,1) , TOL)
+        @assertEqual(1.818181818181_defReal, res % FM(1,1,1), TOL)
 
         ! Change size of matrix
         res % N = 2
@@ -194,6 +193,7 @@ subroutine testSimpleUseCase(this)
         allocate(res % FM(2,2,1))
 
     end select
+
     ! Get result yet again. This time with wrong size
     call this % clerk % getResult(res, mem)
 
diff --git a/Tallies/TallyClerks/centreOfMassClerk_class.f90 b/Tallies/TallyClerks/centreOfMassClerk_class.f90
index 8f43e6a5b..0b8a3269f 100644
--- a/Tallies/TallyClerks/centreOfMassClerk_class.f90
+++ b/Tallies/TallyClerks/centreOfMassClerk_class.f90
@@ -33,9 +33,8 @@ module centreOfMassClerk_class
   !!
   type, public, extends(tallyClerk) :: centreOfMassClerk
     private
-    real(defReal),dimension(:,:),allocatable :: value            !! cycle-wise COM value
-    integer(shortInt)                        :: maxCycles = 0    !! Number of tally cycles
-    integer(shortInt)                        :: currentCycle = 0 !! track current cycle
+    integer(shortInt) :: maxCycles = 0    !! Number of tally cycles
+    integer(shortInt) :: currentCycle = 0 !! track current cycle
 
   contains
     ! Procedures used during build
@@ -45,6 +44,7 @@ module centreOfMassClerk_class
 
     ! File reports and check status -> run-time procedures
     procedure  :: reportCycleEnd
+    procedure  :: closeCycle
 
     ! Output procedures
     procedure  :: display
@@ -52,6 +52,7 @@ module centreOfMassClerk_class
 
     ! Deconstructor
     procedure  :: kill
+
   end type centreOfMassClerk
 
 contains
@@ -70,10 +71,6 @@ subroutine init(self, dict, name)
     ! Read number of cycles for which to track COM
     call dict % get(self % maxCycles, 'cycles')
 
-    ! Allocate space for storing value
-    allocate(self % value(self % maxCycles, 3))
-    self % value = ZERO
-
   end subroutine init
 
   !!
@@ -83,7 +80,7 @@ function validReports(self) result(validCodes)
     class(centreOfMassClerk),intent(in)        :: self
     integer(shortInt),dimension(:),allocatable :: validCodes
 
-    validCodes = [cycleEnd_Code]
+    validCodes = [cycleEnd_Code, closeCycle_CODE]
 
   end function validReports
 
@@ -94,7 +91,7 @@ elemental function getSize(self) result(S)
     class(centreOfMassClerk), intent(in)   :: self
     integer(shortInt)                      :: S
 
-    S = 3*self % maxCycles
+    S = 3 * self % maxCycles
 
   end function getSize
 
@@ -105,7 +102,9 @@ subroutine reportCycleEnd(self, end, mem)
     class(centreOfMassClerk), intent(inout)   :: self
     class(particleDungeon), intent(in)        :: end
     type(scoreMemory), intent(inout)          :: mem
-    integer(shortInt)                         :: i, cc
+    integer(shortInt)                         :: i
+    integer(longInt)                          :: cc
+    real(defReal), dimension(3)               :: val
 
     if ((self % currentCycle) < (self % maxCycles)) then
 
@@ -113,18 +112,43 @@ subroutine reportCycleEnd(self, end, mem)
       cc = self % currentCycle
 
       ! Loop through population, scoring probabilities
-      do i = 1,end % popSize()
+      do i = 1, end % popSize()
+
         associate(state => end % get(i))
-          self % value(cc,:) = self % value(cc,:) + state % wgt * state % r
+          val = state % wgt * state % r / end % popWeight()
+          call mem % score(val(1), self % getMemAddress() + 3*(cc - 1))
+          call mem % score(val(2), self % getMemAddress() + 3*(cc - 1) + 1)
+          call mem % score(val(3), self % getMemAddress() + 3*(cc - 1) + 2)
         end associate
-      end do
 
-      self % value(cc,:) = self % value(cc,:) / end % popWeight()
+      end do
 
     end if
 
   end subroutine reportCycleEnd
 
+  !!
+  !! Process cycle end
+  !!
+  subroutine closeCycle(self, end, mem)
+    class(centreOfMassClerk), intent(inout)   :: self
+    class(particleDungeon), intent(in)        :: end
+    type(scoreMemory), intent(inout)          :: mem
+    integer(longInt)                          :: cc
+
+    if ((self % currentCycle) < (self % maxCycles)) then
+
+      cc = self % currentCycle
+
+      ! Make sure results don't get normalised arbitrarily
+      call mem % closeBin(ONE, self % getMemAddress() + 3*(cc - 1))
+      call mem % closeBin(ONE, self % getMemAddress() + 3*(cc - 1) + 1)
+      call mem % closeBin(ONE, self % getMemAddress() + 3*(cc - 1) + 2)
+
+    end if
+
+  end subroutine closeCycle
+
   !!
   !! Display convergance progress on the console
   !!
@@ -145,6 +169,8 @@ subroutine print(self, outFile, mem)
     type(scoreMemory), intent(in)        :: mem
     integer(shortInt)                    :: i
     character(nameLen)                   :: name
+    integer(longInt)                     :: ccIdx
+    real(defReal)                        :: val
 
     ! Begin block
     call outFile % startBlock(self % getName())
@@ -152,22 +178,28 @@ subroutine print(self, outFile, mem)
     ! Print COM
     name = 'CoMx'
     call outFile % startArray(name, [self % maxCycles])
-    do i=1,self % maxCycles
-      call outFile % addValue(self % value(i,1))
+    do i = 1, self % maxCycles
+      ccIdx = self % getMemAddress() + 3*(i - 1)
+      call mem % getResult(val, ccIdx, samples = 1)
+      call outFile % addValue(val)
     end do
     call outFile % endArray()
 
     name = 'CoMy'
     call outFile % startArray(name, [self % maxCycles])
-    do i=1,self % maxCycles
-      call outFile % addValue(self % value(i,2))
+    do i = 1, self % maxCycles
+      ccIdx = self % getMemAddress() + 3*(i - 1) + 1
+      call mem % getResult(val, ccIdx, samples = 1)
+      call outFile % addValue(val)
     end do
     call outFile % endArray()
 
     name = 'CoMz'
     call outFile % startArray(name, [self % maxCycles])
-    do i=1,self % maxCycles
-      call outFile % addValue(self % value(i,3))
+    do i = 1, self % maxCycles
+      ccIdx = self % getMemAddress() + 3*(i - 1) + 2
+      call mem % getResult(val, ccIdx, samples = 1)
+      call outFile % addValue(val)
     end do
     call outFile % endArray()
 
@@ -181,7 +213,6 @@ end subroutine print
   elemental subroutine kill(self)
     class(centreOfMassClerk), intent(inout) :: self
 
-    if (allocated(self % value)) deallocate(self % value)
     self % currentCycle = 0
     self % maxCycles = 0
 
diff --git a/Tallies/TallyClerks/collisionProbabilityClerk_class.f90 b/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
index 9350b2df6..c9debd62a 100644
--- a/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
+++ b/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
@@ -78,7 +78,7 @@ module collisionProbabilityClerk_class
 
     ! File reports and check status -> run-time procedures
     procedure  :: reportInColl
-    procedure  :: reportCycleEnd
+    procedure  :: closeCycle
 
     ! Overwrite default run-time result procedure
     procedure  :: getResult
@@ -144,7 +144,7 @@ function validReports(self) result(validCodes)
     class(collisionProbabilityClerk),intent(in) :: self
     integer(shortInt),dimension(:),allocatable  :: validCodes
 
-    validCodes = [inColl_CODE, cycleEnd_Code]
+    validCodes = [inColl_CODE, closeCycle_CODE]
 
   end function validReports
 
@@ -208,7 +208,6 @@ subroutine reportInColl(self, p, xsData, mem, virtual)
     !score = self % resp % get(p, xsData) * p % w / xsData % getTotalMatXS(p, p % matIdx())
     score = p % w
 
-    ! I think this is right but I need to double check!
     ! Score element of the matrix
     addr = self % getMemAddress() + sIdx * self % N + cIdx
     call mem % score(score, addr)
@@ -220,7 +219,7 @@ end subroutine reportInColl
   !!
   !! See tallyClerk_inter for details
   !!
-  subroutine reportCycleEnd(self, end, mem)
+  subroutine closeCycle(self, end, mem)
     class(collisionProbabilityClerk), intent(inout) :: self
     class(particleDungeon), intent(in)              :: end
     type(scoreMemory), intent(inout)                :: mem
@@ -229,6 +228,7 @@ subroutine reportCycleEnd(self, end, mem)
     real(defReal)                                   :: val, normFactor
 
     if (mem % lastCycle()) then
+
       ! Set address to the start of collision probability matrix
       ! Decrease by 1 to get correct address on the first iteration of the loop
       addrCPM  = self % getMemAddress() - 1
@@ -257,7 +257,7 @@ subroutine reportCycleEnd(self, end, mem)
       end do
     end if
 
-  end subroutine reportCycleEnd
+  end subroutine closeCycle
 
   !!
   !! Return result from the clerk for interaction with Physics Package
@@ -322,6 +322,7 @@ pure subroutine getResult(self, res, mem)
         end do
 
     end select
+
   end subroutine getResult
 
   !!
diff --git a/Tallies/TallyClerks/dancoffBellClerk_class.f90 b/Tallies/TallyClerks/dancoffBellClerk_class.f90
index 8f269b70c..3b3ac7bae 100644
--- a/Tallies/TallyClerks/dancoffBellClerk_class.f90
+++ b/Tallies/TallyClerks/dancoffBellClerk_class.f90
@@ -77,7 +77,7 @@ module dancoffBellClerk_class
 
     ! File reports and check status -> run-time procedures
     procedure  :: reportTrans
-    procedure  :: reportCycleEnd
+    procedure  :: closeCycle
 
     ! Output procedures
     procedure  :: display
@@ -158,7 +158,7 @@ function validReports(self) result(validCodes)
     class(dancoffBellClerk),intent(in)         :: self
     integer(shortInt),dimension(:),allocatable :: validCodes
 
-    validCodes = [trans_CODE, cycleEnd_CODE]
+    validCodes = [trans_CODE, closeCycle_CODE]
 
   end function validReports
 
@@ -189,7 +189,6 @@ subroutine reportTrans(self, p, xsData, mem)
     integer(shortInt)                      :: T_end, T_start
     real(defReal)                          :: w_end
     type(particleState)                    :: state
-
     character(100),parameter :: Here = 'reportTrans (dancoffBellClerk_class.f90)'
 
     ! Find start material type; Exit if not fuel
@@ -226,11 +225,11 @@ subroutine reportTrans(self, p, xsData, mem)
   end subroutine reportTrans
 
   !!
-  !! Process end of the cycle
+  !! Close cycle
   !!
   !! See tallyClerk_inter for details
   !!
-  subroutine reportCycleEnd(self, end, mem)
+  subroutine closeCycle(self, end, mem)
     class(dancoffBellClerk), intent(inout)  :: self
     class(particleDungeon), intent(in)      :: end
     type(scoreMemory), intent(inout)        :: mem
@@ -242,7 +241,7 @@ subroutine reportCycleEnd(self, end, mem)
       call mem % accumulate(escSigmaT / fuelWgt, self % getMemAddress() + D_EFF)
     end if
 
-  end subroutine reportCycleEnd
+  end subroutine closeCycle
 
   !!
   !! Display convergance progress on the console
diff --git a/Tallies/TallyClerks/keffAnalogClerk_class.f90 b/Tallies/TallyClerks/keffAnalogClerk_class.f90
index ce79a75e5..643765c36 100644
--- a/Tallies/TallyClerks/keffAnalogClerk_class.f90
+++ b/Tallies/TallyClerks/keffAnalogClerk_class.f90
@@ -14,14 +14,15 @@ module keffAnalogClerk_class
   use tallyResult_class,     only : tallyResult, tallyResultEmpty
   use tallyClerk_inter,      only : tallyClerk, kill_super => kill
 
-#ifdef MPI
-  use mpi_func,              only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, &
-                                    MASTER_RANK, isMPIMaster
-#endif
-
   implicit none
   private
 
+  !! Locations of diffrent bins wrt memory address of the clerk
+  integer(shortInt), parameter :: MEM_SIZE = 3
+  integer(longInt), parameter  :: START_POP = 0 ,&  ! Population tally at the start of the cycle
+                                  END_POP   = 1 ,&  ! Population tally at the end of the cycle
+                                  K_EST     = 2     ! k-eff estimate
+
   !!
   !! Simplest possible analog k-eff estimator that determines
   !! criticality by comparing population weight before and after transport cycle
@@ -38,9 +39,6 @@ module keffAnalogClerk_class
   !! }
   !!
   type, public,extends(tallyClerk) :: keffAnalogClerk
-    private
-    real(defReal) :: startPopWgt = ZERO
-    real(defReal) :: endPopWgt   = ZERO
   contains
     ! Procedures used during build
     procedure :: init
@@ -51,11 +49,13 @@ module keffAnalogClerk_class
     ! File reports and check status -> run-time procedures
     procedure :: reportCycleStart
     procedure :: reportCycleEnd
+    procedure :: closeCycle
 
     ! Output procedures
     procedure  :: display
     procedure  :: print
     procedure  :: getResult
+
   end type keffAnalogClerk
 
   !!
@@ -64,7 +64,7 @@ module keffAnalogClerk_class
   !! Public Members:
   !!   keff -> Result, keff(1) is criticality, keff(2) is STD
   !!
-  type,public, extends(tallyResult) :: keffResult
+  type, public, extends(tallyResult) :: keffResult
     real(defReal), dimension(2) :: keff = [ONE, ZERO]
   end type keffResult
 
@@ -85,10 +85,6 @@ subroutine init(self, dict, name)
     ! Needs no settings, just load name
     call self % setName(name)
 
-    ! Ensure correct initialisation to default values
-    self % startPopWgt = ZERO
-    self % endPopWgt   = ZERO
-
   end subroutine init
 
   !!
@@ -100,9 +96,6 @@ elemental subroutine kill(self)
     ! Superclass
     call kill_super(self)
 
-    self % startPopWgt = ZERO
-    self % endPopWgt = ZERO
-
   end subroutine kill
 
   !!
@@ -114,7 +107,7 @@ function validReports(self) result(validCodes)
     class(keffAnalogClerk),intent(in)           :: self
     integer(shortInt),dimension(:),allocatable :: validCodes
 
-    validCodes = [ cycleStart_CODE, cycleEnd_CODE ]
+    validCodes = [cycleStart_CODE, cycleEnd_CODE, closeCycle_CODE]
 
   end function validReports
 
@@ -127,7 +120,7 @@ elemental function getSize(self) result(S)
     class(keffAnalogClerk), intent(in) :: self
     integer(shortInt)                  :: S
 
-    S = 1
+    S = MEM_SIZE
 
   end function getSize
 
@@ -140,18 +133,9 @@ subroutine reportCycleStart(self, start, mem)
     class(keffAnalogClerk), intent(inout) :: self
     class(particleDungeon), intent(in)    :: start
     type(scoreMemory), intent(inout)      :: mem
-#ifdef MPI
-    integer(shortInt)                     :: error
-    real(defReal)                         :: buffer
-#endif
-
-    ! Update start population weight
-    self % startPopWgt = self % startPopWgt + start % popWeight()
 
-#ifdef MPI
-    call mpi_reduce(self % startPopWgt, buffer, 1, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
-    if (isMPIMaster()) self % startPopWgt = buffer
-#endif
+    ! Add score to counter
+    call mem % score(start % popWeight(), self % getMemAddress() + START_POP)
 
   end subroutine reportCycleStart
 
@@ -164,38 +148,37 @@ subroutine reportCycleEnd(self, end, mem)
     class(keffAnalogClerk), intent(inout) :: self
     class(particleDungeon), intent(in)    :: end
     type(scoreMemory), intent(inout)      :: mem
-    real(defReal)                         :: k_norm, k_eff
-#ifdef MPI
-    integer(shortInt)                     :: error
-    real(defReal)                         :: buffer
-#endif
-
-    ! Update end population weight
-    self % endPopWgt = self % endPopWgt + end % popWeight()
-
-#ifdef MPI
-    call mpi_reduce(self % endPopWgt, buffer, 1, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
-    if (isMPIMaster()) then
-      self % endPopWgt = buffer
-    else
-      self % endPopWgt = ZERO
-    end if
-#endif
+
+    ! Add score to counter
+    call mem % score(end % popWeight(), self % getMemAddress() + END_POP)
+
+  end subroutine reportCycleEnd
+
+  !!
+  !! Close the cycle
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine closeCycle(self, end, mem)
+    class(keffAnalogClerk), intent(inout) :: self
+    class(particleDungeon), intent(in)    :: end
+    type(scoreMemory), intent(inout)      :: mem
+    real(defReal)                         :: k_norm, k_eff, startPopWgt, endPopWgt
 
     ! Close batch
     if (mem % lastCycle()) then
       k_norm = end % k_eff
 
-      ! Calculate and score analog estimate of k-eff
-      k_eff =  self % endPopWgt / self % startPopWgt * k_norm
-      call mem % accumulate(k_eff, self % getMemAddress())
+      startPopWgt = mem % getScore(self % getMemAddress() + START_POP)
+      endPopWgt   = mem % getScore(self % getMemAddress() + END_POP)
 
-      self % startPopWgt = ZERO
-      self % endPopWgt   = ZERO
+      ! Calculate and score analog estimate of k-eff
+      k_eff =  endPopWgt / startPopWgt * k_norm
+      call mem % accumulate(k_eff, self % getMemAddress() + K_EST)
 
     end if
 
-  end subroutine reportCycleEnd
+  end subroutine closeCycle
 
   !!
   !! Display convergance progress on the console
@@ -208,7 +191,7 @@ subroutine display(self, mem)
     real(defReal)                       :: k, STD
     character(MAX_COL)                  :: buffer
 
-    call mem % getResult(k, STD, self % getMemAddress())
+    call mem % getResult(k, STD, self % getMemAddress() + K_EST)
 
     ! Print estimates to a console
     write(buffer, '(A,F8.5,A,F8.5)') 'k-eff (analog): ',  k, ' +/- ', STD
@@ -229,10 +212,10 @@ subroutine print(self, outFile, mem)
     character(nameLen)                 :: name
 
     ! Get result value
-    call mem % getResult(k, STD, self % getMemAddress())
+    call mem % getResult(k, STD, self % getMemAddress() + K_EST)
 
     ! Print to output file
-    call outFile % startBlock(self % getName() )
+    call outFile % startBlock(self % getName())
     name = 'k_analog'
     call outFile % printResult(k, STD, name)
     call outFile % endBlock()
@@ -252,10 +235,11 @@ pure subroutine getResult(self, res, mem)
     real(defReal)                                   :: k, STD
 
     ! Get result value
-    call mem % getResult(k, STD, self % getMemAddress())
+    call mem % getResult(k, STD, self % getMemAddress() + K_EST)
 
     allocate(res, source = keffResult([k, STD]))
 
   end subroutine getResult
 
+
 end module keffAnalogClerk_class
diff --git a/Tallies/TallyClerks/keffImplicitClerk_class.f90 b/Tallies/TallyClerks/keffImplicitClerk_class.f90
index 3d4c96f9f..b5416ac54 100644
--- a/Tallies/TallyClerks/keffImplicitClerk_class.f90
+++ b/Tallies/TallyClerks/keffImplicitClerk_class.f90
@@ -70,7 +70,7 @@ module keffImplicitClerk_class
     procedure :: reportInColl
     procedure :: reportOutColl
     procedure :: reportHist
-    procedure :: reportCycleEnd
+    procedure :: closeCycle
     procedure :: isConverged
 
     ! Output procedures
@@ -135,7 +135,7 @@ function validReports(self) result(validCodes)
     class(keffImplicitClerk),intent(in)           :: self
     integer(shortInt),dimension(:),allocatable :: validCodes
 
-    validCodes = [inColl_CODE, outColl_CODE, cycleEnd_CODE, hist_CODE]
+    validCodes = [inColl_CODE, outColl_CODE, hist_CODE, closeCycle_CODE]
 
   end function validReports
 
@@ -269,7 +269,7 @@ end subroutine reportHist
   !!
   !! See tallyClerk_inter for details
   !!
-  subroutine reportCycleEnd(self, end, mem)
+  subroutine closeCycle(self, end, mem)
     class(keffImplicitClerk), intent(inout) :: self
     class(particleDungeon), intent(in)      :: end
     type(scoreMemory), intent(inout)        :: mem
@@ -277,6 +277,7 @@ subroutine reportCycleEnd(self, end, mem)
     real(defReal)                           :: nuFiss, absorb, leakage, scatterMul, k_est
 
     if (mem % lastCycle()) then
+
       addr = self % getMemAddress()
       nuFiss     = mem % getScore(addr + IMP_PROD)
       absorb     = mem % getScore(addr + IMP_ABS)
@@ -285,9 +286,10 @@ subroutine reportCycleEnd(self, end, mem)
 
       k_est = nuFiss / (absorb + leakage - scatterMul)
       call mem % accumulate(k_est, addr + K_EFF)
+
     end if
 
-  end subroutine reportCycleEnd
+  end subroutine closeCycle
 
   !!
   !! Perform convergance check in the Clerk
diff --git a/Tallies/TallyClerks/shannonEntropyClerk_class.f90 b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
index 9a941e5e5..9c92fef08 100644
--- a/Tallies/TallyClerks/shannonEntropyClerk_class.f90
+++ b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
@@ -22,13 +22,16 @@ module shannonEntropyClerk_class
 
   !!
   !! Shannon entropy estimator
+  !!
   !! This is a prototype implementation of an implicit Shannon entropy estimator
   !! Takes cycle end reports to generate cycle-wise entropy
   !! Contains only a single map for discretisation
+  !!
   !! Scores Shannon entropy for a user-specified number of cycles
   !!
-  !! Notes:
-  !!    ->
+  !! NOTE: when using multiple MPI processes, this will give wrong results unless
+  !!       mpiSync is set to 1 for the tallyAdmin that contains this clerk
+  !!
   !! Sample dictionary input:
   !!
   !!  clerkName {
@@ -55,6 +58,7 @@ module shannonEntropyClerk_class
 
     ! File reports and check status -> run-time procedures
     procedure  :: reportCycleEnd
+    procedure  :: closeCycle
 
     ! Output procedures
     procedure  :: display
@@ -62,6 +66,7 @@ module shannonEntropyClerk_class
 
     ! Deconstructor
     procedure  :: kill
+
   end type shannonEntropyClerk
 
 contains
@@ -103,7 +108,7 @@ function validReports(self) result(validCodes)
     class(shannonEntropyClerk),intent(in)      :: self
     integer(shortInt),dimension(:),allocatable :: validCodes
 
-    validCodes = [cycleEnd_Code]
+    validCodes = [cycleEnd_Code, closeCycle_CODE]
 
   end function validReports
 
@@ -114,11 +119,10 @@ elemental function getSize(self) result(S)
     class(shannonEntropyClerk), intent(in) :: self
     integer(shortInt)                      :: S
 
-    S = self % N + self % maxCycles
+    S = self % N + self % maxCycles + 1
 
   end function getSize
 
-
   !!
   !! Process cycle end
   !!
@@ -126,37 +130,67 @@ subroutine reportCycleEnd(self, end, mem)
     class(shannonEntropyClerk), intent(inout) :: self
     class(particleDungeon), intent(in)        :: end
     type(scoreMemory), intent(inout)          :: mem
-    integer(shortInt)                         :: i, j, cc, idx
-    real(defReal)                             :: totWgt, one_log2
+    integer(shortInt)                         :: i, idx
+    real(defReal)                             :: totWgt, prob
 
     if (self % currentCycle < self % maxCycles) then
 
-      self % currentCycle = self % currentCycle + 1
-      cc = self % currentCycle
+      totWgt = end % popWeight()
+      call mem % score(prob, self % getMemAddress())
 
       ! Loop through population, scoring probabilities
-      do i = 1,end % popSize()
-        associate( state => end % get(i) )
+      do i = 1, end % popSize()
+
+        associate(state => end % get(i))
           idx = self % map % map(state)
-          if( idx > 0) self % prob(idx) = self % prob(idx) + state % wgt
+          if (idx > 0) then
+            prob = state % wgt
+            call mem % score(prob, self % getMemAddress() + idx)
+          end if
         end associate
+
       end do
 
-      totWgt = end % popWeight()
+    end if
+
+  end subroutine reportCycleEnd
+
+  !!
+  !! Close cycle
+  !!
+  subroutine closeCycle(self, end, mem)
+    class(shannonEntropyClerk), intent(inout) :: self
+    class(particleDungeon), intent(in)        :: end
+    type(scoreMemory), intent(inout)          :: mem
+    integer(shortInt)                         :: i
+    integer(longInt)                          :: ccIdx, idx
+    real(defReal)                             :: totWgt, one_log2, prob, val
+
+    if (self % currentCycle < self % maxCycles) then
+
+      self % currentCycle = self % currentCycle + 1
+      ccIdx = self % getMemAddress() + self % N + self % currentCycle
+
+      totWgt = mem % getScore(self % getMemAddress())
       one_log2 = ONE/log(TWO)
 
       ! Loop through bins, summing entropy
-      do j = 1,self % N
-        self % prob(j) = self % prob(j)/totWgt
-        if ((self % prob(j) > ZERO) .AND. (self % prob(j) < ONE)) then
-          self % value(cc) = self % value(cc) - self % prob(j) * log(self % prob(j)) * one_log2
+      do i = 1, self % N
+        idx = i
+        prob = mem % getScore(self % getMemAddress() + idx)
+
+        if ((prob > ZERO) .and. (prob < ONE)) then
+          prob = prob / totWgt
+          val = val + prob * log(prob) * one_log2
         end if
-        self % prob(j) = ZERO
+
       end do
 
+      call mem % accumulate(val, ccIdx)
+
     end if
 
-  end subroutine reportCycleEnd
+  end subroutine closeCycle
 
   !!
   !! Display convergance progress on the console
@@ -174,10 +208,12 @@ end subroutine display
   !!
   subroutine print(self, outFile, mem)
     class(shannonEntropyClerk), intent(in) :: self
-    class(outputFile), intent(inout) :: outFile
-    type(scoreMemory), intent(in)    :: mem
-    integer(shortInt)                :: i
-    character(nameLen)               :: name
+    class(outputFile), intent(inout)       :: outFile
+    type(scoreMemory), intent(in)          :: mem
+    integer(shortInt)                      :: i
+    character(nameLen)                     :: name
+    integer(longInt)                       :: ccIdx
+    real(defReal)                          :: val
 
     ! Begin block
     call outFile % startBlock(self % getName())
@@ -187,9 +223,12 @@ subroutine print(self, outFile, mem)
 
     call outFile % startArray(name, [self % maxCycles])
 
-    do i = 1,self % maxCycles
+    do i = 1, self % maxCycles
+      ccIdx = self % getMemAddress() + self % N + i
+      call mem % getResult(val, ccIdx, samples = 1)
       call outFile % addValue(self % value(i))
     end do
+
     call outFile % endArray()
 
     call outFile % endBlock()
@@ -203,8 +242,6 @@ elemental subroutine kill(self)
     class(shannonEntropyClerk), intent(inout) :: self
 
     if (allocated(self % map)) deallocate(self % map)
-    if (allocated(self % prob)) deallocate(self % prob)
-    if (allocated(self % value)) deallocate(self % value)
     self % N = 0
     self % currentCycle = 0
     self % maxCycles = 0
diff --git a/Tallies/TallyClerks/simpleFMClerk_class.f90 b/Tallies/TallyClerks/simpleFMClerk_class.f90
index e52aeb57b..4f9852def 100644
--- a/Tallies/TallyClerks/simpleFMClerk_class.f90
+++ b/Tallies/TallyClerks/simpleFMClerk_class.f90
@@ -61,10 +61,10 @@ module simpleFMClerk_class
   type, public, extends(tallyClerk) :: simpleFMClerk
     private
     !! Map defining the discretisation
-    class(tallyMap), allocatable           :: map
-    type(macroResponse)                    :: resp
-    real(defReal),dimension(:),allocatable :: startWgt
-    integer(shortInt)                      :: N = 0 !! Number of bins
+    class(tallyMap), allocatable :: map
+    type(macroResponse)          :: resp
+    integer(shortInt)            :: N = 0 !! Number of bins
+
     ! Settings
     logical(defBool) :: handleVirtual = .true.
 
@@ -77,7 +77,7 @@ module simpleFMClerk_class
     ! File reports and check status -> run-time procedures
     procedure  :: reportCycleStart
     procedure  :: reportInColl
-    procedure  :: reportCycleEnd
+    procedure  :: closeCycle
 
     ! Overwrite default run-time result procedure
     procedure  :: getResult
@@ -99,8 +99,8 @@ module simpleFMClerk_class
   !!    dim3 -> 1 is values; 2 is STDs
   !!
   type,public, extends( tallyResult) :: FMresult
-    integer(shortInt)                           :: N  = 0 ! Size of FM
-    real(defReal), dimension(:,:,:),allocatable :: FM     ! FM proper
+    integer(shortInt)                            :: N  = 0 ! Size of FM
+    real(defReal), dimension(:,:,:), allocatable :: FM     ! FM proper
   end type FMResult
 
 contains
@@ -124,9 +124,6 @@ subroutine init(self, dict, name)
     ! Read size of the map
     self % N = self % map % bins(0)
 
-    ! Allocate space for starting weights
-    allocate(self % startWgt(self % N))
-
     ! Initialise response
     call self % resp % build(macroNuFission)
 
@@ -144,7 +141,7 @@ function validReports(self) result(validCodes)
     class(simpleFMClerk),intent(in)            :: self
     integer(shortInt),dimension(:),allocatable :: validCodes
 
-    validCodes = [inColl_CODE, cycleStart_Code ,cycleEnd_Code]
+    validCodes = [inColl_CODE, cycleStart_CODE, closeCycle_CODE]
 
   end function validReports
 
@@ -157,13 +154,14 @@ elemental function getSize(self) result(S)
     class(simpleFMClerk), intent(in) :: self
     integer(shortInt)                :: S
 
-    S = self % N * self % N
+    S = self % N * (self % N + 1)
 
   end function getSize
 
   !!
   !! Process start of the cycle
-  !! Calculate starting weights in each bin
+  !! Calculate starting weights in each bin and store them at memory location:
+  !! self % getMemAddress() : self % getMemAddress() + N - 1
   !!
   !! See tallyClerk_inter for details
   !!
@@ -173,14 +171,15 @@ subroutine reportCycleStart(self, start, mem)
     type(scoreMemory), intent(inout)    :: mem
     integer(shortInt)                   :: idx, i
 
-    self % startWgt = ZERO
-
     ! Loop through a population and calculate starting weight in each bin
-    do i = 1,start % popSize()
+    do i = 1, start % popSize()
 
       associate (state => start % get(i))
+
         idx = self % map % map(state)
-        if (idx > 0) self % startWgt(idx) = self % startWgt(idx) + state % wgt
+        if (idx == 0) return
+        call mem % score(state % wgt, self % getMemAddress() - 1 + idx)
+
       end associate
 
     end do
@@ -190,6 +189,9 @@ end subroutine reportCycleStart
   !!
   !! Process incoming collision report
   !!
+  !! Calculate matrix elements and store them at memory location:
+  !! self % getMemAddress() + N : self % getMemAddress() + N*(1 + N)
+  !!
   !! See tallyClerk_inter for details
   !!
   subroutine reportInColl(self, p, xsData, mem, virtual)
@@ -241,7 +243,8 @@ subroutine reportInColl(self, p, xsData, mem, virtual)
     score = self % resp % get(p, xsData) * flux
 
     ! Score element of the matrix
-    addr = self % getMemAddress() + (sIdx - 1) * self % N + cIdx - 1
+    ! Note that matrix memory location starts from memAddress + N
+    addr = self % getMemAddress() + sIdx * self % N + cIdx - 1
     call mem % score(score, addr)
 
   end subroutine reportInColl
@@ -251,7 +254,7 @@ end subroutine reportInColl
   !!
   !! See tallyClerk_inter for details
   !!
-  subroutine reportCycleEnd(self, end, mem)
+  subroutine closeCycle(self, end, mem)
     class(simpleFMClerk), intent(inout) :: self
     class(particleDungeon), intent(in)  :: end
     type(scoreMemory), intent(inout)    :: mem
@@ -262,23 +265,26 @@ subroutine reportCycleEnd(self, end, mem)
     if (mem % lastCycle()) then
       ! Set address to the start of Fission Matrix
       ! Decrease by 1 to get correct address on the first iteration of the loop
-      addrFM  = self % getMemAddress() - 1
+      addrFM  = self % getMemAddress() + self % N - 1
 
       ! Normalise and accumulate estimates
-      do i = 1,self % N
+      do i = 1, self % N
+
         ! Calculate normalisation factor
-        normFactor = self % startWgt(i)
+        normFactor = mem % getScore(self % getMemAddress() - 1 + i)
         if (normFactor /= ZERO) normFactor = ONE / normFactor
 
-        do j = 1,self % N
+        do j = 1, self % N
           ! Normalise FM column
           addrFM = addrFM + 1
           call mem % closeBin(normFactor, addrFM)
         end do
+
       end do
+
     end if
 
-  end subroutine reportCycleEnd
+  end subroutine closeCycle
 
   !!
   !! Return result from the clerk for interaction with Physics Package
@@ -300,14 +306,15 @@ pure subroutine getResult(self, res, mem)
     ! Do not deallocate if already allocated to FMresult
     ! Its not to nice -> clean up
     if (allocated(res)) then
+
       select type(res)
         class is (FMresult)
           ! Do nothing
-
-        class default ! Reallocate
+        class default
+          ! Reallocate
           deallocate(res)
           allocate( FMresult :: res)
-     end select
+      end select
 
     else
       allocate( FMresult :: res)
@@ -320,10 +327,12 @@ pure subroutine getResult(self, res, mem)
         ! Check size and reallocate space if needed
         ! This is horrible. Hove no time to polish. Blame me (MAK)
         if (allocated(res % FM)) then
+
           if (any(shape(res % FM) /= [self % N, self % N, 2])) then
             deallocate(res % FM)
             allocate(res % FM(self % N, self % N, 2))
           end if
+
         else
           allocate(res % FM(self % N, self % N, 2))
         end if
@@ -332,7 +341,7 @@ pure subroutine getResult(self, res, mem)
         res % N = self % N
 
         ! Load entries
-        addr = self % getMemAddress() - 1
+        addr = self % getMemAddress() + self % N - 1
         do i = 1,self % N
           do j = 1, self % N
             addr = addr + 1
@@ -381,7 +390,7 @@ subroutine print(self, outFile, mem)
 
     ! Print fission matrix
     name = 'FM'
-    addr = self % getMemAddress() - 1
+    addr = self % getMemAddress() + self % N - 1
 
     call outFile % startArray(name, [self % N, self % N])
 
@@ -408,7 +417,6 @@ elemental subroutine kill(self)
     call kill_super(self)
 
     if (allocated(self % map)) deallocate(self % map)
-    if (allocated(self % startWgt)) deallocate(self % startWgt)
 
     self % N = 0
     self % handleVirtual = .true.
diff --git a/Tallies/TallyClerks/tallyClerkSlot_class.f90 b/Tallies/TallyClerks/tallyClerkSlot_class.f90
index 860ca71b2..d5d50392d 100644
--- a/Tallies/TallyClerks/tallyClerkSlot_class.f90
+++ b/Tallies/TallyClerks/tallyClerkSlot_class.f90
@@ -49,6 +49,7 @@ module tallyClerkSlot_class
     procedure :: reportHist
     procedure :: reportCycleStart
     procedure :: reportCycleEnd
+    procedure :: closeCycle
     procedure :: isConverged
 
     ! Output procedures
@@ -294,6 +295,21 @@ subroutine reportCycleEnd(self, end, mem)
 
   end subroutine reportCycleEnd
 
+  !!
+  !! Close cycle
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine closeCycle(self, end, mem)
+    class(tallyClerkSlot), intent(inout) :: self
+    class(particleDungeon), intent(in)   :: end
+    type(scoreMemory), intent(inout)     :: mem
+
+    ! Pass call to instance in the slot
+    call self % slot % closeCycle(end, mem)
+
+  end subroutine closeCycle
+
   !!
   !! Perform convergance check in the Clerk
   !!
diff --git a/Tallies/TallyClerks/tallyClerk_inter.f90 b/Tallies/TallyClerks/tallyClerk_inter.f90
index 9eea9eba7..29cad3a65 100644
--- a/Tallies/TallyClerks/tallyClerk_inter.f90
+++ b/Tallies/TallyClerks/tallyClerk_inter.f90
@@ -60,7 +60,8 @@ module tallyClerk_inter
   !!   reportSpawn      -> Process particle generation report
   !!   reportHist       -> Process history report
   !!   reportCycleStart -> Process beginning of a cycle report
-  !!   reportCycleEnd   -> Process end of a cycle report (e.g. Calculate functions of scores like k-eff)
+  !!   reportCycleEnd   -> Process end of a cycle report
+  !!   closeCycle       -> Performs operations, e.g., calculate functions of scores like k-eff
   !!   isConverged      -> Return .true. if convergence criterion has been reached
   !!   display          -> Display to the console current value of a Score
   !!   print            -> Print results to the output file
@@ -96,6 +97,7 @@ module tallyClerk_inter
     procedure :: reportHist
     procedure :: reportCycleStart
     procedure :: reportCycleEnd
+    procedure :: closeCycle
     procedure :: isConverged
 
     ! Output procedures
@@ -123,6 +125,7 @@ module tallyClerk_inter
     !!     hist_CODE
     !!     cycleStart_CODE
     !!     cycleEnd_CODE
+    !!     closeCycle_CODE
     !!
     !! Errors:
     !!   None
@@ -417,6 +420,28 @@ subroutine reportCycleEnd(self, end, mem)
 
   end subroutine reportCycleEnd
 
+  !!
+  !! Closes the cycle
+  !!
+  !! See tallyAdmin_class for implicit assumptionas about the report.
+  !!
+  !! Args:
+  !!   end [in]    -> Particle Dungeon with particles for NEXT cycle (before any normalisation)
+  !!   mem [inout] -> Score Memory
+  !!
+  !! Errors:
+  !!   Depends on specific Clerk
+  !!
+  subroutine closeCycle(self, end, mem)
+    class(tallyClerk), intent(inout)   :: self
+    class(particleDungeon), intent(in) :: end
+    type(scoreMemory), intent(inout)   :: mem
+    character(100),parameter  :: Here = 'closeCycle (tallyClerk_inter.f90)'
+
+    call fatalError(Here,'Report was sent to an instance that does not support it.')
+
+  end subroutine closeCycle
+
   !!
   !! Perform convergence check in the Clerk
   !!
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index 4e98be7dd..ffb5e2951 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -382,24 +382,28 @@ subroutine reduceBins(self)
     integer(longInt)                  :: i
     character(100),parameter :: Here = 'reduceBins (scoreMemory_class.f90)'
 
-    !$omp parallel do
-    do i = 1, self % N
-      self % bins(i, BIN) = sum(self % parallelBins(i,:))
-      self % parallelBins(i,:) = ZERO
-    end do
-    !$omp end parallel do
+    if (self % lastCycle()) then
+
+      !$omp parallel do
+      do i = 1, self % N
+        self % bins(i, BIN) = sum(self % parallelBins(i,:))
+        self % parallelBins(i,:) = ZERO
+      end do
+      !$omp end parallel do
 
-    ! Reduce across processes
-    ! We use the parallelBins array as a temporary storage
+      ! Reduce across processes
+      ! We use the parallelBins array as a temporary storage
 #ifdef MPI
-    ! Since the number of bins is limited by 64bit signed integer and the
-    ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
-    ! to split the reduction operation into chunks
-    if (self % reduced) then
-      call reduceArray(self % bins(:,BIN), self % parallelBins(:,1))
-    end if
+      ! Since the number of bins is limited by 64bit signed integer and the
+      ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
+      ! to split the reduction operation into chunks
+      if (self % reduced) then
+        call reduceArray(self % bins(:,BIN), self % parallelBins(:,1))
+      end if
 #endif
 
+    end if
+
   end subroutine reduceBins
 
   !!
@@ -507,13 +511,13 @@ end subroutine reduceArray
   !! Returns 0 if index is invalid
   !!
   elemental subroutine getResult_withSTD(self, mean, STD, idx, samples)
-    class(scoreMemory), intent(in)         :: self
-    real(defReal), intent(out)             :: mean
-    real(defReal),intent(out)              :: STD
-    integer(longInt), intent(in)           :: idx
-    integer(shortInt), intent(in),optional :: samples
-    integer(shortInt)                      :: N
-    real(defReal)                          :: inv_N, inv_Nm1
+    class(scoreMemory), intent(in)          :: self
+    real(defReal), intent(out)              :: mean
+    real(defReal),intent(out)               :: STD
+    integer(longInt), intent(in)            :: idx
+    integer(shortInt), intent(in), optional :: samples
+    integer(shortInt)                       :: N
+    real(defReal)                           :: inv_N, inv_Nm1
 
     !! Verify index. Return 0 if not present
     if (idx < 0_longInt .or. idx > self % N) then
@@ -550,14 +554,14 @@ end subroutine getResult_withSTD
   !! Returns 0 if index is invalid
   !!
   elemental subroutine getResult_withoutSTD(self, mean, idx, samples)
-    class(scoreMemory), intent(in)         :: self
-    real(defReal), intent(out)             :: mean
-    integer(longInt), intent(in)           :: idx
-    integer(shortInt), intent(in),optional :: samples
-    integer(shortInt)                      :: N
+    class(scoreMemory), intent(in)          :: self
+    real(defReal), intent(out)              :: mean
+    integer(longInt), intent(in)            :: idx
+    integer(shortInt), intent(in), optional :: samples
+    integer(shortInt)                       :: N
 
     !! Verify index. Return 0 if not present
-    if( idx < 0_longInt .or. idx > self % N) then
+    if (idx < 0_longInt .or. idx > self % N) then
       mean = ZERO
       return
     end if
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index bbeb232e9..c244b228a 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -56,6 +56,7 @@ module tallyAdmin_class
   !!   histClerks       -> List of indices of all clerks that require histReport
   !!   cycleStartClerks -> List of indices of all clerks that require cycleStartReport
   !!   cycleEndClerks   -> List of indices of all clerks that require cycleEndReport
+  !!   closeCycleClerks -> List of indices of all clerks that require closeCycle
   !!   displayList      -> List of indices of all clerks that are registered for display
   !!   mem              -> Score Memory for all defined clerks
   !!   mpiSync          ->
@@ -119,6 +120,7 @@ module tallyAdmin_class
     type(dynIntArray)  :: histClerks
     type(dynIntArray)  :: cycleStartClerks
     type(dynIntArray)  :: cycleEndClerks
+    type(dynIntArray)  :: closeCycleClerks
 
     ! List of clerks to display
     type(dynIntArray)  :: displayList
@@ -200,7 +202,7 @@ subroutine init(self,dict)
 
     ! Register all clerks to recive their reports
     do i = 1,size(self % tallyClerks)
-      associate( reports => self % tallyClerks(i) % validReports() )
+      associate(reports => self % tallyClerks(i) % validReports())
         do j = 1,size(reports)
           call self % addToReports(reports(j), i)
         end do
@@ -227,7 +229,7 @@ subroutine init(self,dict)
 
     ! Initialise score memory
     ! Calculate required size.
-    memSize = sum( self % tallyClerks % getSize() ) + 1
+    memSize = sum(self % tallyClerks % getSize()) + 1
     call self % mem % init(memSize, 1, batchSize = cyclesPerBatch, reduced = self % mpiSync)
 
     ! Assign memory locations to the clerks
@@ -286,6 +288,7 @@ recursive subroutine kill(self)
     call self % histClerks % kill()
     call self % cycleStartClerks % kill()
     call self % cycleEndClerks % kill()
+    call self % closeCycleClerks % kill()
 
     ! Kill score memory
     call self % mem % kill()
@@ -307,7 +310,7 @@ recursive subroutine push(self, atch)
     class(tallyAdmin), intent(inout)      :: self
     type(tallyAdmin), pointer, intent(in) :: atch
 
-    if(associated(self % atch)) then
+    if (associated(self % atch)) then
       call self % atch % push(atch)
 
     else
@@ -331,10 +334,10 @@ recursive subroutine pop(self, atch)
     class(tallyAdmin), intent(inout)       :: self
     type(tallyAdmin), pointer, intent(out) :: atch
 
-    if(.not. associated(self % atch)) then ! Single element list
+    if (.not. associated(self % atch)) then ! Single element list
       atch => null()
 
-    elseif( associated(self % atch % atch)) then ! Go down the list
+    elseif (associated(self % atch % atch)) then ! Go down the list
       call self % atch % pop(atch)
 
     else ! Remove last element
@@ -361,16 +364,17 @@ recursive function getEnd(self) result(atch)
     class(tallyAdmin), intent(in)    :: self
     type(tallyAdmin),pointer         :: atch
 
-    if(.not. associated(self % atch)) then
+    if (.not. associated(self % atch)) then
       atch => null()
 
-    elseif( associated(self % atch % atch)) then
+    elseif (associated(self % atch % atch)) then
       atch => self % atch % getEnd()
 
     else
       atch => self % atch
 
     end if
+
   end function getEnd
 
   !!
@@ -749,15 +753,24 @@ recursive subroutine reportCycleEnd(self, end)
       call reportCycleEnd(self % atch, end)
     end if
 
+    ! Go through all clerks that request the reportCycleEnd
+    !$omp parallel do
+    do i = 1,self % cycleEndClerks % getSize()
+      idx = self % cycleEndClerks % get(i)
+      call self % tallyClerks(idx) % reportCycleEnd(end, self % mem)
+    end do
+    !$omp end parallel do
+
     ! Reduce the scores across the threads and processes
     call self % mem % reduceBins()
 
     if (isMPIMaster() .or. .not. self % mpiSync) then
-      ! Go through all clerks that request the report
+
+      ! Go through all clerks that request closeCycle
       !$omp parallel do
-      do i = 1,self % cycleEndClerks % getSize()
-        idx = self % cycleEndClerks % get(i)
-        call self % tallyClerks(idx) % reportCycleEnd(end, self % mem)
+      do i = 1, self % closeCycleClerks % getSize()
+        idx = self % closeCycleClerks % get(i)
+        call self % tallyClerks(idx) % closeCycle(end, self % mem)
       end do
       !$omp end parallel do
 
@@ -878,6 +891,9 @@ subroutine addToReports(self, reportCode, idx)
       case(cycleEnd_CODE)
         call self % cycleEndClerks % add(idx)
 
+      case(closeCycle_CODE)
+        call self % closeCycleClerks % add(idx)
+
       case default
         call fatalError(Here, 'Undefined reportCode')
     end select
diff --git a/Tallies/tallyCodes.f90 b/Tallies/tallyCodes.f90
index 4a05d37aa..280acfd17 100644
--- a/Tallies/tallyCodes.f90
+++ b/Tallies/tallyCodes.f90
@@ -17,7 +17,8 @@ module tallyCodes
                                         spawn_CODE      = 1004 ,&
                                         hist_CODE       = 1005 ,&
                                         cycleStart_CODE = 1006 ,&
-                                        cycleEnd_CODE   = 1007
+                                        cycleEnd_CODE   = 1007 ,&
+                                        closeCycle_CODE = 1008
 
   ! List of codes for fiffrent particle fates
   integer(shortInt),parameter,public :: abs_FATE  = 5000 ,&

From b4fc455fff9319106b8ce8193fd02ec32183b84a Mon Sep 17 00:00:00 2001
From: "V. Raffuzzi" <vr339@eng.cam.ac.uk>
Date: Wed, 18 Sep 2024 18:36:06 +0100
Subject: [PATCH 24/27] Fix shannon entropy clerk with MPI

---
 .../TallyClerks/shannonEntropyClerk_class.f90 | 130 ++++++++++--------
 1 file changed, 76 insertions(+), 54 deletions(-)

diff --git a/Tallies/TallyClerks/shannonEntropyClerk_class.f90 b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
index 9c92fef08..a937f0d39 100644
--- a/Tallies/TallyClerks/shannonEntropyClerk_class.f90
+++ b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
@@ -17,6 +17,11 @@ module shannonEntropyClerk_class
   use tallyMap_inter,             only : tallyMap
   use tallyMapFactory_func,       only : new_tallyMap
 
+  use mpi_func,                   only : isMPIMaster
+#ifdef MPI
+  use mpi_func,                   only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK
+#endif
+
   implicit none
   private
 
@@ -29,8 +34,9 @@ module shannonEntropyClerk_class
   !!
   !! Scores Shannon entropy for a user-specified number of cycles
   !!
-  !! NOTE: when using multiple MPI processes, this will give wrong results unless
-  !!       mpiSync is set to 1 for the tallyAdmin that contains this clerk
+  !! NOTE: when using MPI with multiple processes, the scores from all processes are
+  !!       collected (brute-force) in the master process. Only the master process
+  !!       results are correct and accessible
   !!
   !! Sample dictionary input:
   !!
@@ -43,12 +49,10 @@ module shannonEntropyClerk_class
   type, public, extends(tallyClerk) :: shannonEntropyClerk
     private
     !! Map defining the discretisation
-    class(tallyMap), allocatable                   :: map
-    real(defReal),dimension(:),allocatable         :: prob             !! probability of being in a given bin
-    real(defReal),dimension(:),allocatable, public :: value            !! cycle-wise value of entropy
-    integer(shortInt)                              :: N = 0            !! Number of bins
-    integer(shortInt)                              :: maxCycles = 0    !! Number of tally cycles
-    integer(shortInt)                              :: currentCycle = 0 !! track current cycle
+    class(tallyMap), allocatable  :: map
+    integer(shortInt)             :: N = 0            !! Number of bins
+    integer(shortInt)             :: maxCycles = 0    !! Number of tally cycles
+    integer(shortInt)             :: currentCycle = 0 !! track current cycle
 
   contains
     ! Procedures used during build
@@ -88,17 +92,9 @@ subroutine init(self, dict, name)
     ! Read number of cycles for which to track entropy
     call dict % get(self % maxCycles, 'cycles')
 
-    ! Allocate space for storing entropy
-    allocate(self % value(self % maxCycles))
-    self % value = ZERO
-
     ! Read size of the map
     self % N = self % map % bins(0)
 
-    ! Allocate space for storing probabilities
-    allocate(self % prob(self % N))
-    self % prob = ZERO
-
   end subroutine init
 
   !!
@@ -119,7 +115,7 @@ elemental function getSize(self) result(S)
     class(shannonEntropyClerk), intent(in) :: self
     integer(shortInt)                      :: S
 
-    S = self % N + self % maxCycles + 1
+    S = self % N + self % maxCycles
 
   end function getSize
 
@@ -131,26 +127,47 @@ subroutine reportCycleEnd(self, end, mem)
     class(particleDungeon), intent(in)        :: end
     type(scoreMemory), intent(inout)          :: mem
     integer(shortInt)                         :: i, idx
-    real(defReal)                             :: totWgt, prob
+    real(defReal), dimension(self % N)        :: prob, bufferArray
+    real(defReal)                             :: totWgt, buffer
+#ifdef MPI
+    integer(shortInt)                         :: error
+#endif
 
-    if (self % currentCycle < self % maxCycles) then
+    self % currentCycle = self % currentCycle + 1
 
-      totWgt = end % popWeight()
-      call mem % score(prob, self % getMemAddress())
+    if (self % currentCycle <= self % maxCycles) then
+
+      prob = ZERO
 
       ! Loop through population, scoring probabilities
       do i = 1, end % popSize()
 
         associate(state => end % get(i))
           idx = self % map % map(state)
-          if (idx > 0) then
-            prob = state % wgt
-            call mem % score(prob, self % getMemAddress() + idx)
-          end if
+          if (idx > 0) prob(idx) = prob(idx) + state % wgt
         end associate
 
       end do
 
+      totWgt = end % popWeight()
+
+      buffer = totWgt
+      bufferArray = prob
+
+#ifdef MPI
+      call mpi_reduce(totWgt, buffer, 1, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(prob, bufferArray, self % N, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+#endif
+
+      if (isMPIMaster()) then
+
+        prob = bufferArray / buffer
+        do i = 1, self % N
+          call mem % score(prob(i), self % getMemAddress() - 1 + i)
+        end do
+
+      end if
+
     end if
 
   end subroutine reportCycleEnd
@@ -163,30 +180,31 @@ subroutine closeCycle(self, end, mem)
     class(particleDungeon), intent(in)        :: end
     type(scoreMemory), intent(inout)          :: mem
     integer(shortInt)                         :: i
-    integer(longInt)                          :: ccIdx, idx
-    real(defReal)                             :: totWgt, one_log2, prob, val
+    integer(longInt)                          :: ccIdx
+    real(defReal)                             :: one_log2, prob, val
 
-    if (self % currentCycle < self % maxCycles) then
+    if (isMPIMaster()) then
 
-      self % currentCycle = self % currentCycle + 1
-      ccIdx = self % getMemAddress() + self % N + self % currentCycle
+      if (self % currentCycle <= self % maxCycles) then
 
-      totWgt = mem % getScore(self % getMemAddress())
-      one_log2 = ONE/log(TWO)
+        ccIdx = self % getMemAddress() + self % N - 1 + self % currentCycle
 
-      ! Loop through bins, summing entropy
-      do i = 1, self % N
-        idx = i
-        prob = mem % getScore(self % getMemAddress() + idx)
+        val = ZERO
+        one_log2 = ONE/log(TWO)
 
-        if ((prob > ZERO) .and. (prob < ONE)) then
-          prob = prob / totWgt
-          val = val + prob * log(prob) * one_log2
-        end if
+        ! Loop through bins, summing entropy
+        do i = 1, self % N
+          prob = mem % getScore(self % getMemAddress() - 1 + i)
 
-      end do
+          if ((prob > ZERO) .and. (prob < ONE)) then
+            val = val - prob * log(prob) * one_log2
+          end if
+
+        end do
 
-      call mem % accumulate(val, ccIdx)
+        call mem % accumulate(val, ccIdx)
+
+      end if
 
     end if
 
@@ -215,23 +233,27 @@ subroutine print(self, outFile, mem)
     integer(longInt)                       :: ccIdx
     real(defReal)                          :: val
 
-    ! Begin block
-    call outFile % startBlock(self % getName())
+    if (isMPIMaster()) then
+
+      ! Begin block
+      call outFile % startBlock(self % getName())
 
-    ! Print entropy
-    name = 'shannonEntropy'
+      ! Print entropy
+      name = 'shannonEntropy'
 
-    call outFile % startArray(name, [self % maxCycles])
+      call outFile % startArray(name, [self % maxCycles])
 
-    do i = 1, self % maxCycles
-      ccIdx = self % getMemAddress() + self % N + i
-      call mem % getResult(val, ccIdx, samples = 1)
-      call outFile % addValue(self % value(i))
-    end do
+      do i = 1, self % maxCycles
+        ccIdx = self % getMemAddress() + self % N - 1 + i
+        call mem % getResult(val, ccIdx, samples = 1)
+        call outFile % addValue(val)
+      end do
 
-    call outFile % endArray()
+      call outFile % endArray()
 
-    call outFile % endBlock()
+      call outFile % endBlock()
+
+    end if
 
   end subroutine print
 

From 2b0504456e933f1c6b8850751595052bea6f7d3b Mon Sep 17 00:00:00 2001
From: "V. Raffuzzi" <vr339@eng.cam.ac.uk>
Date: Tue, 1 Oct 2024 15:31:11 +0100
Subject: [PATCH 25/27] Update MPI branch to main latest and allow tests to run
 with MPI

---
 CMakeLists.txt                                |   13 +-
 DataStructures/Tests/heapQueue_test.f90       |    2 +-
 .../aceDatabase/aceNeutronDatabase_class.f90  |    2 +-
 .../Tests/particleDungeon_test.f90            |   21 +-
 ParticleObjects/particleDungeon_class.f90     |   19 +-
 PhysicsPackages/eigenPhysicsPackage_class.f90 |   12 +-
 .../fixedSourcePhysicsPackage_class.f90       |    4 +-
 SharedModules/mpi_func.f90                    |   68 +-
 .../TallyClerks/centreOfMassClerk_class.f90   |    2 +-
 .../TallyClerks/shannonEntropyClerk_class.f90 |    6 +-
 Tallies/TallyClerks/trackClerk_class.f90      |  586 ++++----
 Tallies/Tests/scoreMemory_test.f90            |  602 ++++-----
 Tallies/scoreMemory_class.f90                 | 1196 ++++++++---------
 docs/{User Manual.rst => Input Manual.rst}    |    6 +-
 docs/Installation.rst                         |   65 +-
 docs/Running.rst                              |   37 +
 docs/Tutorials/Tutorial_1.rst                 |    4 +-
 17 files changed, 1365 insertions(+), 1280 deletions(-)
 rename docs/{User Manual.rst => Input Manual.rst} (99%)
 create mode 100644 docs/Running.rst

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e6cf3e0b2..13c1f3d28 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -145,9 +145,9 @@ get_property(SRCS GLOBAL PROPERTY SRCS_LIST)
 # Compile library
 add_library(scone STATIC ${SRCS})
 target_compile_options(scone PRIVATE ${scone_extra_flags} )
-target_link_libraries(scone  ${LAPACK_LIBRARIES} )
+target_link_libraries(scone PUBLIC ${LAPACK_LIBRARIES} )
 if(MPI)
-  target_link_libraries(scone MPI::MPI_Fortran)
+  target_link_libraries(scone PUBLIC MPI::MPI_Fortran)
 endif()
 
 if(LTO)
@@ -158,10 +158,6 @@ endif()
 # COMPILE SOLVERS
 add_executable(scone.out ./Apps/scone.f90 )
 target_link_libraries(scone.out scone )
-if(MPI)
-  target_link_libraries(scone.out MPI::MPI_Fortran)
-endif()
-
 
 ####################################################################################################
 # COMPILE UNIT TESTS
@@ -178,7 +174,8 @@ if(BUILD_TESTS)
 
   add_pfunit_ctest(unitTests
     TEST_SOURCES ${UNIT_TESTS_RELATIVE}
-    LINK_LIBRARIES scone ${LAPACK_LIBRARIES}
+    LINK_LIBRARIES scone
+    MAX_PES 1
     )
 
   # pFUnit may have a bug which causes a unused variable `class(Test), allocatable :: t` be
@@ -199,7 +196,7 @@ if(BUILD_TESTS)
 
   add_pfunit_ctest(integrationTests
     TEST_SOURCES ${INTEGRATION_TESTS_RELATIVE}
-    LINK_LIBRARIES scone ${LAPACK_LIBRARIES}
+    LINK_LIBRARIES scone
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
     )
 
diff --git a/DataStructures/Tests/heapQueue_test.f90 b/DataStructures/Tests/heapQueue_test.f90
index 47abda7f6..76191cedf 100644
--- a/DataStructures/Tests/heapQueue_test.f90
+++ b/DataStructures/Tests/heapQueue_test.f90
@@ -1,7 +1,7 @@
 module heapQueue_test
   use numPrecision
   use heapQueue_class, only: heapQueue
-  use pFUnit_mod
+  use funit
 
   implicit none
 
diff --git a/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90 b/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
index 041c07691..b71929835 100644
--- a/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
+++ b/NuclearData/ceNeutronData/aceDatabase/aceNeutronDatabase_class.f90
@@ -880,7 +880,7 @@ subroutine init(self, dict, ptr, silent )
       if(loud) then
         call statusMsg("Building: "// trim(name)// " with index: " //numToChar(nucIdx))
         if (idx /= 0 .and. idx2 == 0) &
-            call statusMsg("including S(alpha,beta) tables with file: " //trim(name_file))
+            call statusMsg("including S(alpha,beta) tables with file: " //trim(name_file1))
         if (idx /= 0 .and. idx2 /= 0) &
             call statusMsg("including S(alpha,beta) tables with files: " //trim(name_file1)//' '//trim(name_file2))
       end if
diff --git a/ParticleObjects/Tests/particleDungeon_test.f90 b/ParticleObjects/Tests/particleDungeon_test.f90
index 22f544b9a..bea00ad86 100644
--- a/ParticleObjects/Tests/particleDungeon_test.f90
+++ b/ParticleObjects/Tests/particleDungeon_test.f90
@@ -1,8 +1,10 @@
 module particleDungeon_test
   use numPrecision
+  use errors_mod,            only : fatalError
   use RNG_class,             only : RNG
   use particle_class,        only : particle, particleState
   use particleDungeon_class, only : particleDungeon
+  use mpi_func,              only : mpiInitTypes, MPI_COMM_WORLD
   use funit
 
   implicit none
@@ -194,6 +196,7 @@ subroutine testWeightNorm()
 
     ! Clean
     call dungeon % kill()
+
   end subroutine testWeightNorm
 
   !!
@@ -206,9 +209,17 @@ subroutine testNormPopDown()
     type(particleDungeon)    :: dungeon
     type(particle)           :: p
     type(RNG)                :: pRNG
-    integer(shortInt)        :: i
+    integer(shortInt)        :: i, worldSize, ierr
     real(defReal), parameter :: TOL = 1.0E-9
+    character(100),parameter :: Here = 'testNormPopDown (particleDungeon_test.f90)'
+
+    call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
+
+    if (worldSize > 1) &
+      call fatalError(Here, 'This test cannot be run with multiple MPI processes')
 
+    ! Initialise MPI types needed for this procedure
+    call mpiInitTypes()
 
     ! Initialise
     call dungeon % init(10)
@@ -245,8 +256,14 @@ subroutine testNormPopUp()
     type(particleDungeon)    :: dungeon
     type(particle)           :: p
     type(RNG)                :: pRNG
-    integer(shortInt)        :: i
+    integer(shortInt)        :: i, worldSize, ierr
     real(defReal), parameter :: TOL = 1.0E-9
+    character(100),parameter :: Here = 'testNormPopUp (particleDungeon_test.f90)'
+
+    call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
+
+    if (worldSize > 1) &
+      call fatalError(Here, 'This test cannot be run with multiple MPI processes')
 
     ! Initialise
     call dungeon % init(20)
diff --git a/ParticleObjects/particleDungeon_class.f90 b/ParticleObjects/particleDungeon_class.f90
index 9b37b245b..e14c0babf 100644
--- a/ParticleObjects/particleDungeon_class.f90
+++ b/ParticleObjects/particleDungeon_class.f90
@@ -1,7 +1,8 @@
 module particleDungeon_class
 
   use numPrecision
-  use genericProcedures,     only : fatalError, numToChar, swap
+  use errors_mod,            only : fatalError
+  use genericProcedures,     only : numToChar, swap
   use particle_class,        only : particle, particleState
   use RNG_class,             only : RNG
   use heapQueue_class,       only : heapQueue
@@ -9,8 +10,8 @@ module particleDungeon_class
   use mpi_func,              only : isMPIMaster, getMPIWorldSize, getMPIRank, getOffset
 #ifdef MPI
   use mpi_func,              only : mpi_gather, mpi_allgather, mpi_send, mpi_recv, &
-                                    mpi_Bcast, MPI_COMM_WORLD, MASTER_RANK, MPI_DEFREAL, &
-                                    MPI_SHORTINT, MPI_LONGINT, MPI_PARTICLE_STATE, &
+                                    mpi_Bcast, MPI_COMM_WORLD, MASTER_RANK, MPI_DOUBLE, &
+                                    MPI_INT, MPI_LONG_LONG, MPI_PARTICLE_STATE, &
                                     MPI_STATUS_IGNORE, particleStateDummy
 #endif
 
@@ -452,7 +453,7 @@ subroutine normSize(self, totPop, rand)
 
 #ifdef MPI
     ! Get the population sizes of all ranks into the array popSizes in master branch
-    call mpi_gather(self % pop, 1, MPI_SHORTINT, popSizes, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD, error)
+    call mpi_gather(self % pop, 1, MPI_INT, popSizes, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD, error)
 #endif
 
     ! In the master process, calculate sampling threshold for the whole population
@@ -504,9 +505,9 @@ subroutine normSize(self, totPop, rand)
 
     ! Broadcast threshold, excess and random number seeds to all processes
 #ifdef MPI
-    call MPI_Bcast(threshold, 1, MPI_DEFREAL, MASTER_RANK, MPI_COMM_WORLD)
-    call MPI_Bcast(excess, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD)
-    call MPI_Bcast(seeds, nRanks, MPI_LONGINT, MASTER_RANK, MPI_COMM_WORLD)
+    call MPI_Bcast(threshold, 1, MPI_DOUBLE, MASTER_RANK, MPI_COMM_WORLD)
+    call MPI_Bcast(excess, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD)
+    call MPI_Bcast(seeds, nRanks, MPI_LONG_LONG, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
     ! Get local process rank and initialise local rng with the correct seed
@@ -581,7 +582,7 @@ subroutine normSize(self, totPop, rand)
 
 #ifdef MPI
     ! Get the updated population numbers from all processes
-    call mpi_allgather(self % pop, 1, MPI_SHORTINT, popSizes, 1, MPI_SHORTINT, MPI_COMM_WORLD, error)
+    call mpi_allgather(self % pop, 1, MPI_INT, popSizes, 1, MPI_INT, MPI_COMM_WORLD, error)
 #endif
 
     ! Check that normalisation worked
@@ -615,7 +616,7 @@ subroutine loadBalancing(self, totPop, nRanks, rank, popSizes)
 
     ! Communicates the offset from all processes to all processes
     allocate(rankOffsets(nRanks))
-    call mpi_allgather(mpiOffset, 1, MPI_SHORTINT, rankOffsets, 1, MPI_SHORTINT, MPI_COMM_WORLD, error)
+    call mpi_allgather(mpiOffset, 1, MPI_INT, rankOffsets, 1, MPI_INT, MPI_COMM_WORLD, error)
 
     ! Calculate actual and target cumulative number of sites in the processes before
     offset(1)       = sum(popSizes(1 : rank))
diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 0df131bb6..20ce94ce7 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -8,8 +8,8 @@ module eigenPhysicsPackage_class
                                              printSectionEnd, printSeparatorLine
   use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
 #ifdef MPI
-  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD, &
-                                             MPI_DEFREAL, mpi_reduce, MPI_SUM
+  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INT, MPI_COMM_WORLD, &
+                                             MPI_DOUBLE, mpi_reduce, MPI_SUM
 #endif
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
@@ -289,7 +289,7 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
 
 #ifdef MPI
       ! Broadcast k_eff obtained in the master to all processes
-      call MPI_Bcast(k_new, 1, MPI_DEFREAL, MASTER_RANK, MPI_COMM_WORLD)
+      call MPI_Bcast(k_new, 1, MPI_DOUBLE, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
       ! Load new k-eff estimate into next cycle dungeon
@@ -309,9 +309,9 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
 
 #ifdef MPI
       ! Print the population numbers referred to all processes to screen
-      call mpi_reduce(nStart, nTemp, 1, MPI_INTEGER, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(nStart, nTemp, 1, MPI_INT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
       nStart = nTemp
-      call mpi_reduce(nEnd, nTemp, 1, MPI_INTEGER, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(nEnd, nTemp, 1, MPI_INT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
       nEnd = nTemp
 #endif
 
@@ -473,7 +473,7 @@ subroutine init(self, dict)
 
     ! Broadcast seed to all processes
 #ifdef MPI
-    call MPI_Bcast(seed_temp, 1, MPI_INTEGER, MASTER_RANK, MPI_COMM_WORLD)
+    call MPI_Bcast(seed_temp, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
     seed = seed_temp
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index 3cca40ce0..d7966d3a2 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -8,7 +8,7 @@ module fixedSourcePhysicsPackage_class
                                              printSeparatorLine
   use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
 #ifdef MPI
-  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INTEGER, MPI_COMM_WORLD
+  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INT, MPI_COMM_WORLD
 #endif
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
@@ -382,7 +382,7 @@ subroutine init(self, dict)
 
     ! Broadcast seed to all processes
 #ifdef MPI
-    call MPI_Bcast(seed_temp, 1, MPI_INTEGER, MASTER_RANK, MPI_COMM_WORLD)
+    call MPI_Bcast(seed_temp, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
     seed = seed_temp
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index 7de45a336..da17759c7 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -34,9 +34,6 @@ module mpi_func
 
   !! Common MPI types
 #ifdef MPI
-  type(MPI_Datatype)   :: MPI_DEFREAL
-  type(MPI_Datatype)   :: MPI_SHORTINT
-  type(MPI_Datatype)   :: MPI_LONGINT
   type(MPI_Datatype)   :: MPI_PARTICLE_STATE
 #endif
 
@@ -49,11 +46,7 @@ module mpi_func
   !!
   subroutine mpiInit()
 #ifdef MPI
-    integer(shortInt)        :: ierr, stateSize
-    type(particleStateDummy) :: state
-    integer(kind = MPI_ADDRESS_KIND), dimension(:), allocatable :: displacements
-    integer(shortInt), dimension(:), allocatable                :: blockLengths
-    type(MPI_Datatype), dimension(:), allocatable               :: types
+    integer(shortInt)        :: ierr
 
     call mpi_init(ierr)
 
@@ -61,18 +54,36 @@ subroutine mpiInit()
     call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
     call mpi_comm_rank(MPI_COMM_WORLD, rank, ierr)
 
-    ! Define MPI type for DEFREAL
-    call mpi_type_create_f90_real(precision(1.0_defReal), range(1.0_defReal), &
-                                  MPI_DEFREAL, ierr)
-    call mpi_type_commit(MPI_DEFREAL, ierr)
+    ! Defines types
+    call mpiInitTypes()
+#endif
 
-    ! Define MPI type for SHORTINT
-    call mpi_type_create_f90_integer(range(1_shortInt), MPI_SHORTINT, ierr)
-    call mpi_type_commit(MPI_SHORTINT, ierr)
+  end subroutine mpiInit
 
-    ! Define MPI type for LONGINT
-    call mpi_type_create_f90_integer(range(1_longInt), MPI_LONGINT, ierr)
-    call mpi_type_commit(MPI_LONGINT, ierr)
+  !!
+  !! Finalise MPI environment
+  !!
+  !! Needs to be called at the end of calculation after all MPI calls
+  !!
+  subroutine mpiFinalise()
+#ifdef MPI
+    integer(shortInt) :: ierr
+
+    call mpi_finalize(ierr)
+
+#endif
+  end subroutine mpiFinalise
+
+  !!
+  !! Initialise types used by the MPI environment
+  !!
+  subroutine mpiInitTypes()
+#ifdef MPI
+    integer(shortInt)        :: ierr, stateSize
+    type(particleStateDummy) :: state
+    integer(kind = MPI_ADDRESS_KIND), dimension(:), allocatable :: displacements
+    integer(shortInt), dimension(:), allocatable                :: blockLengths
+    type(MPI_Datatype), dimension(:), allocatable               :: types
 
     ! Define MPI type for particleState
     ! Note that particleState has stateSize = 13 attributes; if an attribute is
@@ -82,8 +93,8 @@ subroutine mpiInit()
 
     ! Create arrays with dimension and type of each property of particleStateDummy
     blockLengths = (/1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/)
-    types = (/MPI_DEFREAL, MPI_DEFREAL, MPI_DEFREAL, MPI_DEFREAL, MPI_SHORTINT, MPI_LOGICAL, MPI_SHORTINT, &
-              MPI_DEFREAL, MPI_SHORTINT, MPI_SHORTINT, MPI_SHORTINT, MPI_SHORTINT, MPI_SHORTINT/)
+    types = (/MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_LOGICAL, MPI_INT, &
+              MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT/)
 
     ! Create array of memory byte displacements
     call mpi_get_address(state % wgt, displacements(1), ierr)
@@ -104,24 +115,9 @@ subroutine mpiInit()
     ! Define new type
     call mpi_type_create_struct(stateSize, blockLengths, displacements, types, MPI_PARTICLE_STATE, ierr)
     call mpi_type_commit(MPI_PARTICLE_STATE, ierr)
-
 #endif
 
-  end subroutine mpiInit
-
-  !!
-  !! Finalise MPI environment
-  !!
-  !! Needs to be called at the end of calculation after all MPI calls
-  !!
-  subroutine mpiFinalise()
-#ifdef MPI
-    integer(shortInt) :: ierr
-
-    call mpi_finalize(ierr)
-
-#endif
-  end subroutine mpiFinalise
+  end subroutine mpiInitTypes
 
   !!
   !! Get the share of work N for the current process
diff --git a/Tallies/TallyClerks/centreOfMassClerk_class.f90 b/Tallies/TallyClerks/centreOfMassClerk_class.f90
index 0b8a3269f..e246c8dbf 100644
--- a/Tallies/TallyClerks/centreOfMassClerk_class.f90
+++ b/Tallies/TallyClerks/centreOfMassClerk_class.f90
@@ -27,7 +27,7 @@ module centreOfMassClerk_class
   !! Sample dictionary input:
   !!
   !!  clerkName {
-  !!      type comClerk;
+  !!      type centreOfMassClerk;
   !!      cycles 900;
   !!  }
   !!
diff --git a/Tallies/TallyClerks/shannonEntropyClerk_class.f90 b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
index a937f0d39..1315d938e 100644
--- a/Tallies/TallyClerks/shannonEntropyClerk_class.f90
+++ b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
@@ -19,7 +19,7 @@ module shannonEntropyClerk_class
 
   use mpi_func,                   only : isMPIMaster
 #ifdef MPI
-  use mpi_func,                   only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, MASTER_RANK
+  use mpi_func,                   only : mpi_reduce, MPI_SUM, MPI_DOUBLE, MPI_COMM_WORLD, MASTER_RANK
 #endif
 
   implicit none
@@ -155,8 +155,8 @@ subroutine reportCycleEnd(self, end, mem)
       bufferArray = prob
 
 #ifdef MPI
-      call mpi_reduce(totWgt, buffer, 1, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
-      call mpi_reduce(prob, bufferArray, self % N, MPI_DEFREAL, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(totWgt, buffer, 1, MPI_DOUBLE, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(prob, bufferArray, self % N, MPI_DOUBLE, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
 #endif
 
       if (isMPIMaster()) then
diff --git a/Tallies/TallyClerks/trackClerk_class.f90 b/Tallies/TallyClerks/trackClerk_class.f90
index f94ffb7e4..99a98a5e5 100644
--- a/Tallies/TallyClerks/trackClerk_class.f90
+++ b/Tallies/TallyClerks/trackClerk_class.f90
@@ -1,293 +1,293 @@
-module trackClerk_class
-
-  use numPrecision
-  use tallyCodes
-  use genericProcedures,          only : fatalError
-  use display_func,               only : statusMsg
-  use dictionary_class,           only : dictionary
-  use particle_class,             only : particle, particleState
-  use outputFile_class,           only : outputFile
-  use scoreMemory_class,          only : scoreMemory
-  use tallyClerk_inter,           only : tallyClerk, kill_super => kill
-
-  ! Nuclear Data interface
-  use nuclearDatabase_inter,      only : nuclearDatabase
-
-  ! Tally Filters
-  use tallyFilter_inter,          only : tallyFilter
-  use tallyFilterFactory_func,    only : new_tallyFilter
-
-  ! Tally Maps
-  use tallyMap_inter,             only : tallyMap
-  use tallyMapFactory_func,       only : new_tallyMap
-
-  ! Tally Responses
-  use tallyResponseSlot_class,    only : tallyResponseSlot
-
-  implicit none
-  private
-
-  !!
-  !! Track length estimator of reaction rates
-  !! Calculates flux weighted integrals from paticles travelled paths
-  !!
-  !! Private Members:
-  !!   filter   -> Space to store tally Filter
-  !!   map      -> Space to store tally Map
-  !!   response -> Array of responses
-  !!   width    -> Number of responses (# of result bins for each map position)
-  !!
-  !! NOTE that maps and filters refer to the pre-transition particle state! This
-  !! would lead to wrong results in case of spatial grids (e.g. a Cartesian x-y grid)
-  !!
-  !! Interface
-  !!   tallyClerk Interface
-  !!
-  !! SAMPLE DICTIOANRY INPUT:
-  !!
-  !! myTrackClerk {
-  !!   type trackClerk;
-  !!   # filter { <tallyFilter definition> } #
-  !!   # map    { <tallyMap definition>    } #
-  !!   response (resName1 #resName2 ... #)
-  !!   resName1 { <tallyResponse definition> }
-  !!   #resNamew { <tallyResponse definition #
-  !! }
-  !!
-  type, public, extends(tallyClerk) :: trackClerk
-    private
-    ! Filter, Map & Vector of Responses
-    class(tallyFilter), allocatable                  :: filter
-    class(tallyMap), allocatable                     :: map
-    type(tallyResponseSlot),dimension(:),allocatable :: response
-
-    ! Usefull data
-    integer(shortInt)  :: width = 0
-
-  contains
-    ! Procedures used during build
-    procedure  :: init
-    procedure  :: kill
-    procedure  :: validReports
-    procedure  :: getSize
-
-    ! File reports and check status -> run-time procedures
-    procedure  :: reportPath
-
-    ! Output procedures
-    procedure  :: display
-    procedure  :: print
-
-  end type trackClerk
-
-contains
-
-  !!
-  !! Initialise clerk from dictionary and name
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine init(self, dict, name)
-    class(trackClerk), intent(inout)            :: self
-    class(dictionary), intent(in)               :: dict
-    character(nameLen), intent(in)              :: name
-    character(nameLen),dimension(:),allocatable :: responseNames
-    integer(shortInt)                           :: i
-
-    ! Assign name
-    call self % setName(name)
-
-    ! Load filetr
-    if (dict % isPresent('filter')) then
-      call new_tallyFilter(self % filter, dict % getDictPtr('filter'))
-    end if
-
-    ! Load map
-    if (dict % isPresent('map')) then
-      call new_tallyMap(self % map, dict % getDictPtr('map'))
-    end if
-
-    ! Get names of response dictionaries
-    call dict % get(responseNames,'response')
-
-    ! Load responses
-    allocate(self % response(size(responseNames)))
-    do i = 1, size(responseNames)
-      call self % response(i) % init(dict % getDictPtr( responseNames(i) ))
-    end do
-
-    ! Set width
-    self % width = size(responseNames)
-
-  end subroutine init
-
-  !!
-  !! Return to uninitialised state
-  !!
-  elemental subroutine kill(self)
-    class(trackClerk), intent(inout) :: self
-
-    ! Superclass
-    call kill_super(self)
-
-    ! Kill and deallocate filter
-    if (allocated(self % filter)) then
-      deallocate(self % filter)
-    end if
-
-    ! Kill and deallocate map
-    if (allocated(self % map)) then
-      call self % map % kill()
-      deallocate(self % map)
-    end if
-
-    ! Kill and deallocate responses
-    if (allocated(self % response)) then
-      deallocate(self % response)
-    end if
-
-    self % width = 0
-
-  end subroutine kill
-
-  !!
-  !! Returns array of codes that represent diffrent reports
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  function validReports(self) result(validCodes)
-    class(trackClerk),intent(in)               :: self
-    integer(shortInt),dimension(:),allocatable :: validCodes
-
-    validCodes = [path_CODE]
-
-  end function validReports
-
-  !!
-  !! Return memory size of the clerk
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  elemental function getSize(self) result(S)
-    class(trackClerk), intent(in)     :: self
-    integer(shortInt)                 :: S
-
-    S = size(self % response)
-    if (allocated(self % map)) S = S * self % map % bins(0)
-
-  end function getSize
-
-  !!
-  !! Process incoming track length report
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine reportPath(self, p, L, xsData,mem)
-    class(trackClerk), intent(inout)      :: self
-    class(particle), intent(in)           :: p
-    real(defReal), intent(in)             :: L
-    class(nuclearDatabase), intent(inout) :: xsData
-    type(scoreMemory), intent(inout)      :: mem
-    type(particleState)                   :: state
-    type(particle)                        :: pTmp
-    integer(shortInt)                     :: binIdx, i
-    integer(longInt)                      :: adrr
-    real(defReal)                         :: scoreVal, flx
-    character(100), parameter :: Here =' reportPath (trackClerk_class.f90)'
-
-    ! Get pre-transition particle state
-    state = p % prePath
-
-    ! Check if within filter
-    if (allocated( self % filter)) then
-      if (self % filter % isFail(state)) return
-    end if
-
-    ! Find bin index
-    if (allocated(self % map)) then
-      binIdx = self % map % map(state)
-    else
-      binIdx = 1
-    end if
-
-    ! Return if invalid bin index
-    if (binIdx == 0) return
-
-    ! Calculate bin address
-    adrr = self % getMemAddress() + self % width * (binIdx -1)  - 1
-
-    ! tranfer information about Prestate material to a temporary particle
-    pTmp = p
-    pTmp % coords % matIdx = state % matIdx
-
-    ! Calculate flux sample L = path travelled
-    flx = L
-
-    ! Append all bins
-    do i = 1,self % width
-      scoreVal = self % response(i) % get(pTmp, xsData) * p % w * flx
-      call mem % score(scoreVal, adrr + i)
-    end do
-
-  end subroutine reportPath
-
-  !!
-  !! Display convergance progress on the console
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine display(self, mem)
-    class(trackClerk), intent(in)  :: self
-    type(scoreMemory), intent(in)      :: mem
-
-    call statusMsg('trackClerk does not support display yet')
-
-  end subroutine display
-
-  !!
-  !! Write contents of the clerk to output file
-  !!
-  !! See tallyClerk_inter for details
-  !!
-  subroutine print(self, outFile, mem)
-    class(trackClerk), intent(in)              :: self
-    class(outputFile), intent(inout)           :: outFile
-    type(scoreMemory), intent(in)              :: mem
-    real(defReal)                              :: val, std
-    integer(shortInt)                          :: i
-    integer(shortInt),dimension(:),allocatable :: resArrayShape
-    character(nameLen)                         :: name
-
-    ! Begin block
-    call outFile % startBlock(self % getName())
-
-    ! If track clerk has map print map information
-    if( allocated(self % map)) then
-      call self % map % print(outFile)
-    end if
-
-    ! Write results.
-    ! Get shape of result array
-    if(allocated(self % map)) then
-      resArrayShape = [size(self % response), self % map % binArrayShape()]
-    else
-      resArrayShape = [size(self % response)]
-    end if
-
-    ! Start array
-    name ='Res'
-    call outFile % startArray(name, resArrayShape)
-
-    ! Print results to the file
-    do i=1,product(resArrayShape)
-      call mem % getResult(val, std, self % getMemAddress() - 1 + i)
-      call outFile % addResult(val,std)
-
-    end do
-
-    call outFile % endArray()
-    call outFile % endBlock()
-
-  end subroutine print
-
-end module trackClerk_class
+module trackClerk_class
+
+  use numPrecision
+  use tallyCodes
+  use genericProcedures,          only : fatalError
+  use display_func,               only : statusMsg
+  use dictionary_class,           only : dictionary
+  use particle_class,             only : particle, particleState
+  use outputFile_class,           only : outputFile
+  use scoreMemory_class,          only : scoreMemory
+  use tallyClerk_inter,           only : tallyClerk, kill_super => kill
+
+  ! Nuclear Data interface
+  use nuclearDatabase_inter,      only : nuclearDatabase
+
+  ! Tally Filters
+  use tallyFilter_inter,          only : tallyFilter
+  use tallyFilterFactory_func,    only : new_tallyFilter
+
+  ! Tally Maps
+  use tallyMap_inter,             only : tallyMap
+  use tallyMapFactory_func,       only : new_tallyMap
+
+  ! Tally Responses
+  use tallyResponseSlot_class,    only : tallyResponseSlot
+
+  implicit none
+  private
+
+  !!
+  !! Track length estimator of reaction rates
+  !! Calculates flux weighted integrals from paticles travelled paths
+  !!
+  !! Private Members:
+  !!   filter   -> Space to store tally Filter
+  !!   map      -> Space to store tally Map
+  !!   response -> Array of responses
+  !!   width    -> Number of responses (# of result bins for each map position)
+  !!
+  !! NOTE that maps and filters refer to the pre-transition particle state! This
+  !! would lead to wrong results in case of spatial grids (e.g. a Cartesian x-y grid)
+  !!
+  !! Interface
+  !!   tallyClerk Interface
+  !!
+  !! SAMPLE DICTIOANRY INPUT:
+  !!
+  !! myTrackClerk {
+  !!   type trackClerk;
+  !!   # filter { <tallyFilter definition> } #
+  !!   # map    { <tallyMap definition>    } #
+  !!   response (resName1 #resName2 ... #)
+  !!   resName1 { <tallyResponse definition> }
+  !!   #resNamew { <tallyResponse definition #
+  !! }
+  !!
+  type, public, extends(tallyClerk) :: trackClerk
+    private
+    ! Filter, Map & Vector of Responses
+    class(tallyFilter), allocatable                  :: filter
+    class(tallyMap), allocatable                     :: map
+    type(tallyResponseSlot),dimension(:),allocatable :: response
+
+    ! Usefull data
+    integer(shortInt)  :: width = 0
+
+  contains
+    ! Procedures used during build
+    procedure  :: init
+    procedure  :: kill
+    procedure  :: validReports
+    procedure  :: getSize
+
+    ! File reports and check status -> run-time procedures
+    procedure  :: reportPath
+
+    ! Output procedures
+    procedure  :: display
+    procedure  :: print
+
+  end type trackClerk
+
+contains
+
+  !!
+  !! Initialise clerk from dictionary and name
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine init(self, dict, name)
+    class(trackClerk), intent(inout)            :: self
+    class(dictionary), intent(in)               :: dict
+    character(nameLen), intent(in)              :: name
+    character(nameLen),dimension(:),allocatable :: responseNames
+    integer(shortInt)                           :: i
+
+    ! Assign name
+    call self % setName(name)
+
+    ! Load filetr
+    if (dict % isPresent('filter')) then
+      call new_tallyFilter(self % filter, dict % getDictPtr('filter'))
+    end if
+
+    ! Load map
+    if (dict % isPresent('map')) then
+      call new_tallyMap(self % map, dict % getDictPtr('map'))
+    end if
+
+    ! Get names of response dictionaries
+    call dict % get(responseNames,'response')
+
+    ! Load responses
+    allocate(self % response(size(responseNames)))
+    do i = 1, size(responseNames)
+      call self % response(i) % init(dict % getDictPtr( responseNames(i) ))
+    end do
+
+    ! Set width
+    self % width = size(responseNames)
+
+  end subroutine init
+
+  !!
+  !! Return to uninitialised state
+  !!
+  elemental subroutine kill(self)
+    class(trackClerk), intent(inout) :: self
+
+    ! Superclass
+    call kill_super(self)
+
+    ! Kill and deallocate filter
+    if (allocated(self % filter)) then
+      deallocate(self % filter)
+    end if
+
+    ! Kill and deallocate map
+    if (allocated(self % map)) then
+      call self % map % kill()
+      deallocate(self % map)
+    end if
+
+    ! Kill and deallocate responses
+    if (allocated(self % response)) then
+      deallocate(self % response)
+    end if
+
+    self % width = 0
+
+  end subroutine kill
+
+  !!
+  !! Returns array of codes that represent diffrent reports
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  function validReports(self) result(validCodes)
+    class(trackClerk),intent(in)               :: self
+    integer(shortInt),dimension(:),allocatable :: validCodes
+
+    validCodes = [path_CODE]
+
+  end function validReports
+
+  !!
+  !! Return memory size of the clerk
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  elemental function getSize(self) result(S)
+    class(trackClerk), intent(in)     :: self
+    integer(shortInt)                 :: S
+
+    S = size(self % response)
+    if (allocated(self % map)) S = S * self % map % bins(0)
+
+  end function getSize
+
+  !!
+  !! Process incoming track length report
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine reportPath(self, p, L, xsData,mem)
+    class(trackClerk), intent(inout)      :: self
+    class(particle), intent(in)           :: p
+    real(defReal), intent(in)             :: L
+    class(nuclearDatabase), intent(inout) :: xsData
+    type(scoreMemory), intent(inout)      :: mem
+    type(particleState)                   :: state
+    type(particle)                        :: pTmp
+    integer(shortInt)                     :: binIdx, i
+    integer(longInt)                      :: adrr
+    real(defReal)                         :: scoreVal, flx
+    character(100), parameter :: Here =' reportPath (trackClerk_class.f90)'
+
+    ! Get pre-transition particle state
+    state = p % prePath
+
+    ! Check if within filter
+    if (allocated( self % filter)) then
+      if (self % filter % isFail(state)) return
+    end if
+
+    ! Find bin index
+    if (allocated(self % map)) then
+      binIdx = self % map % map(state)
+    else
+      binIdx = 1
+    end if
+
+    ! Return if invalid bin index
+    if (binIdx == 0) return
+
+    ! Calculate bin address
+    adrr = self % getMemAddress() + self % width * (binIdx -1)  - 1
+
+    ! tranfer information about Prestate material to a temporary particle
+    pTmp = p
+    pTmp % coords % matIdx = state % matIdx
+
+    ! Calculate flux sample L = path travelled
+    flx = L
+
+    ! Append all bins
+    do i = 1,self % width
+      scoreVal = self % response(i) % get(pTmp, xsData) * p % w * flx
+      call mem % score(scoreVal, adrr + i)
+    end do
+
+  end subroutine reportPath
+
+  !!
+  !! Display convergance progress on the console
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine display(self, mem)
+    class(trackClerk), intent(in)  :: self
+    type(scoreMemory), intent(in)      :: mem
+
+    call statusMsg('trackClerk does not support display yet')
+
+  end subroutine display
+
+  !!
+  !! Write contents of the clerk to output file
+  !!
+  !! See tallyClerk_inter for details
+  !!
+  subroutine print(self, outFile, mem)
+    class(trackClerk), intent(in)              :: self
+    class(outputFile), intent(inout)           :: outFile
+    type(scoreMemory), intent(in)              :: mem
+    real(defReal)                              :: val, std
+    integer(shortInt)                          :: i
+    integer(shortInt),dimension(:),allocatable :: resArrayShape
+    character(nameLen)                         :: name
+
+    ! Begin block
+    call outFile % startBlock(self % getName())
+
+    ! If track clerk has map print map information
+    if( allocated(self % map)) then
+      call self % map % print(outFile)
+    end if
+
+    ! Write results.
+    ! Get shape of result array
+    if(allocated(self % map)) then
+      resArrayShape = [size(self % response), self % map % binArrayShape()]
+    else
+      resArrayShape = [size(self % response)]
+    end if
+
+    ! Start array
+    name ='Res'
+    call outFile % startArray(name, resArrayShape)
+
+    ! Print results to the file
+    do i=1,product(resArrayShape)
+      call mem % getResult(val, std, self % getMemAddress() - 1 + i)
+      call outFile % addResult(val,std)
+
+    end do
+
+    call outFile % endArray()
+    call outFile % endBlock()
+
+  end subroutine print
+
+end module trackClerk_class
diff --git a/Tallies/Tests/scoreMemory_test.f90 b/Tallies/Tests/scoreMemory_test.f90
index 0c16a437d..7c1aeb5f0 100644
--- a/Tallies/Tests/scoreMemory_test.f90
+++ b/Tallies/Tests/scoreMemory_test.f90
@@ -1,302 +1,302 @@
-module scoreMemory_test
-  use numPrecision
-  use genericProcedures, only : numToChar
-  use scoreMemory_class, only : scoreMemory
-  use pFUnit_mod
-
-  implicit none
-
-@testParameter(constructor = new_testNumber)
-  type, extends(AbstractTestParameter) :: testNumber
-    integer(shortInt) :: i
-  contains
-    procedure :: toString
-  end type testNumber
-
-@testCase(constructor=newTest)
-  type, extends(ParameterizedTestCase) :: test_scoreMemory
-    private
-    integer(longInt)                            :: Ncycles
-    integer(shortInt)                           :: batchSize
-    real(defReal),dimension(:), allocatable     :: scores
-    integer(shortInt), dimension(:),allocatable :: scoresInt
-
-  end type test_scoreMemory
-
-
-contains
-
-  !!
-  !! Build new test parameter form integer
-  !!
-  function new_testNumber(i) result (tstNum)
-    integer(shortInt) :: i
-    type(testNumber)  :: tstNum
-
-    tstNum % i = i
-
-  end function new_testNumber
-
-  !!
-  !! Write test parameter to string
-  !!
-  function toString(this) result(string)
-    class(testNumber), intent(in) :: this
-    character(:), allocatable :: string
-    character(nameLen)        :: str
-
-    write (str,*) this % i
-    string = str
-
-  end function toString
-
-  !!
-  !! Construct test case
-  !!
-  !!
-  !!
-  function newTest(testParam) result(tst)
-    type(testNumber), intent(in)     :: testParam
-    type(test_scoreMemory)           :: tst
-    real(defReal),dimension(200)     :: random
-    integer(shortInt)                :: seed, i
-    integer(shortInt),parameter      :: A = 2469   ! Multiplier of LC PRNG
-    integer(shortInt),parameter      :: M = 65521  ! Modulus of PRNG
-
-    ! Load batchSize
-    tst % batchSize = testParam % i
-    tst % Ncycles   = 10 * tst % batchSize
-
-    ! Generate a vector of 20 pseudo-random numbers in <0;1>
-    ! Generator is not sophisticated but robust
-    seed = 9294
-    do i=1,200
-      seed = mod(A * seed , M)
-      random(i)    = seed / real(M,defReal)
-    end do
-
-    ! Generate some scores and calculate their sum and sum of squares
-    tst  % scores    = TWO + sin(PI * random - PI/2)
-    tst % scoresInt = int(random * 100, shortInt)
-
-  end function newTest
-
-!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-!! PROPER TESTS BEGIN HERE
-!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
-
-  !!
-  !! Test acoring for a case with batchSize == 1
-  !! Look at the end of the file to find MATLAB SCRIPT used to generate reference values
-  !!
-@Test(cases=[1])
-  subroutine testScoring(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-    integer(shortInt)                      :: i, j
-    real(defReal)                          :: res1, res2, STD
-    real(defReal), parameter :: TOL = 1.0E-9
-
-    ! Initialise score memory
-    call mem % init(7_longInt, 1, batchSize = this % batchSize)
-
-    ! Test getting batchSize
-    @assertEqual(this % batchSize, mem % getBatchSize(),'Test getBatchSize() :')
-
-    ! Score in
-    do i=1,10
-      ! Score
-      do j=20*(i-1)+1,20 * i
-        call mem % score(this % scores(j), 1_longInt)
-        call mem % score(this % scoresInt(j), 2_longInt)
-        call mem % score(int(this % scoresInt(j),longInt),3_longInt)
-        call mem % accumulate(this % scores(j), 4_longInt)
-        call mem % accumulate(this % scoresInt(j), 5_longInt)
-        call mem % accumulate(int(this % scoresInt(j),longInt),6_longInt)
-
-      end do
-      call mem % reduceBins()
-      ! Close a single bin with diffrent normalisation
-      call mem % closeBin(1.2_defReal, 3_longInt)
-
-      ! Close Cycle
-      call mem % closeCycle(0.7_defReal)
-
-    end do
-
-    ! Get results from bin 1
-    call mem % getResult(res1, 1_longInt)
-    call mem % getResult(res2, STD, 1_longInt)
-
-    @assertEqual(26.401471259728442_defReal, res1, TOL)
-    @assertEqual(26.401471259728442_defReal, res2, TOL)
-    @assertEqual(0.645969443981583_defReal, STD, TOL)
-
-    ! Get results from bin 2
-    call mem % getResult(res1, 2_longInt)
-    call mem % getResult(res2, STD, 2_longInt)
-
-    @assertEqual(623.0_defReal, res1, TOL)
-    @assertEqual(623.0_defReal, res2, TOL)
-    @assertEqual(27.982494527829360_defReal, STD, TOL)
-
-    ! Get results from bin 3
-    call mem % getResult(res1, 3_longInt)
-    call mem % getResult(res2, STD, 3_longInt)
-
-    @assertEqual(1068.0_defReal, res1, TOL)
-    @assertEqual(1068.0_defReal, res2, TOL)
-    @assertEqual(47.969990619136050_defReal, STD, TOL)
-
-    ! Get results from bin 4
-    call mem % getResult(res1, 4_longInt, 200)
-    call mem % getResult(res2, STD, 4_longInt, 200)
-
-    @assertEqual(1.885819375694888_defReal, res1, TOL)
-    @assertEqual(1.885819375694888_defReal, res2, TOL)
-    @assertEqual(0.049102082638055_defReal, STD, TOL)
-
-    ! Get results from bin 5
-    call mem % getResult(res1, 5_longInt, 200)
-    call mem % getResult(res2, STD, 5_longInt, 200)
-
-    @assertEqual(44.500000000000000_defReal, res1, TOL)
-    @assertEqual(44.500000000000000_defReal, res2, TOL)
-    @assertEqual(2.015580019267494_defReal, STD, TOL)
-
-    ! Get results from bin 6
-    call mem % getResult(res1, 6_longInt, 200)
-    call mem % getResult(res2, STD, 6_longInt, 200)
-
-    @assertEqual(44.500000000000000_defReal, res1, TOL)
-    @assertEqual(44.500000000000000_defReal, res2, TOL)
-    @assertEqual(2.015580019267494_defReal, STD, TOL)
-
-    ! Get results from an empty bin 7
-    call mem % getResult(res1, 7_longInt)
-    call mem % getResult(res2, STD, 7_longInt)
-
-    @assertEqual(ZERO, res1, TOL)
-    @assertEqual(ZERO, res2, TOL)
-    @assertEqual(ZERO, STD, TOL)
-
-    ! Get results from invalid bins
-    call mem % getResult(res1, -7_longInt)
-    call mem % getResult(res2, STD, -7_longInt)
-
-    @assertEqual(ZERO, res1, TOL)
-    @assertEqual(ZERO, res2, TOL)
-    @assertEqual(ZERO, STD, TOL)
-
-    call mem % getResult(res1, 8_longInt)
-    call mem % getResult(res2, STD, 8_longInt)
-
-    @assertEqual(ZERO, res1, TOL)
-    @assertEqual(ZERO, res2, TOL)
-    @assertEqual(ZERO, STD, TOL)
-
-    ! Free memory
-    call mem % kill()
-
-  end subroutine testScoring
-
-  !!
-  !! Test lastCycle
-  !! Ignors test parametrisation
-  !!
-@Test(cases=[1])
-  subroutine testLastCycle(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-    integer(shortInt)                      :: i
-
-    call mem % init(1_longInt, 1, batchSize = 8)
-
-    ! Test getting batchSize
-    @assertEqual(8, mem % getBatchSize(),'Test getBatchSize() :')
-
-    do i=1,16
-      if(i == 8 .or. i == 16) then
-        @assertTrue( mem % lastCycle(), 'In cycle num: '//numToChar(i))
-      else
-        @assertFalse( mem % lastCycle(), 'In cycle num: '//numToChar(i))
-      end if
-      call mem % closeCycle(ONE)
-    end do
-
-    call mem % kill()
-
-  end subroutine testLastCycle
-
-  !!
-  !! Test get score
-  !! Ignore test parametrisation
-  !!
-@Test(cases=[1])
-  subroutine testGetScore(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-    real(defReal),parameter :: TOL = 1.0E-9
-
-    call mem % init(1_longInt, 1)
-
-    call mem % score(ONE,1_longInt)
-    call mem % score(ONE,1_longInt)
-    call mem % score(ONE,1_longInt)
-    call mem % reduceBins()
-
-    @assertEqual(3*ONE, mem % getScore(1_longInt), TOL, 'Test getScore, valid bin:')
-    @assertEqual(ZERO, mem % getScore(0_longInt), TOL, 'Test getScore, not +ve bin:')
-    @assertEqual(ZERO, mem % getScore(2_longInt), TOL, 'Test getScore, too large bin:')
-
-  end subroutine testGetScore
-
-  !!
-  !! Test killing uninitialised scoreMemory
-  !!
-@Test(cases=[1])
-  subroutine testKillUnalloc(this)
-    class(test_scoreMemory), intent(inout) :: this
-    type(scoreMemory)                      :: mem
-
-    call mem % kill()
-
-  end subroutine testKillUnalloc
-
-end module scoreMemory_test
-!! MATLAB SCRIPT USED TO GENERATE REFERENCE VALUES
-!clear
-!rand = zeros(20,1);
-!seed = 9294;
-!
-!%LCG Params
-!A = 2469;
-!M = 65521;
-!
-!for i=1:1:200
-!  seed = mod(A * seed, M);
-!  rand(i) = seed/M;
-!end
-!
-!% Calculate scores vector
-!scores = 2.0 + sin(pi() .* rand - pi()/2);
-!scoresInt = floor(100.*rand);
-!
-!% Accumulate results
-!resAcc = mean(scores)
-!stdAcc = sqrt(var(scores)./200)
-!
-!resAccInt = mean(scoresInt)
-!stdAccInt = sqrt(var(scoresInt)./200)
-!
-!% Reshape scores
-!scores = reshape(scores,[20,10]);
-!scores = sum(scores,1)* 0.7;
-!res = mean(scores)
-!std = sqrt(var(scores)./10)
-!
-!% Reshape scores
-!scoresInt = reshape(scoresInt,[20,10]);
-!scoresInt = sum(scoresInt,1)* 0.7;
-!resInt = mean(scoresInt)
+module scoreMemory_test
+  use numPrecision
+  use genericProcedures, only : numToChar
+  use scoreMemory_class, only : scoreMemory
+  use funit
+
+  implicit none
+
+@testParameter(constructor = new_testNumber)
+  type, extends(AbstractTestParameter) :: testNumber
+    integer(shortInt) :: i
+  contains
+    procedure :: toString
+  end type testNumber
+
+@testCase(constructor=newTest)
+  type, extends(ParameterizedTestCase) :: test_scoreMemory
+    private
+    integer(longInt)                            :: Ncycles
+    integer(shortInt)                           :: batchSize
+    real(defReal),dimension(:), allocatable     :: scores
+    integer(shortInt), dimension(:),allocatable :: scoresInt
+
+  end type test_scoreMemory
+
+
+contains
+
+  !!
+  !! Build new test parameter form integer
+  !!
+  function new_testNumber(i) result (tstNum)
+    integer(shortInt) :: i
+    type(testNumber)  :: tstNum
+
+    tstNum % i = i
+
+  end function new_testNumber
+
+  !!
+  !! Write test parameter to string
+  !!
+  function toString(this) result(string)
+    class(testNumber), intent(in) :: this
+    character(:), allocatable :: string
+    character(nameLen)        :: str
+
+    write (str,*) this % i
+    string = str
+
+  end function toString
+
+  !!
+  !! Construct test case
+  !!
+  !!
+  !!
+  function newTest(testParam) result(tst)
+    type(testNumber), intent(in)     :: testParam
+    type(test_scoreMemory)           :: tst
+    real(defReal),dimension(200)     :: random
+    integer(shortInt)                :: seed, i
+    integer(shortInt),parameter      :: A = 2469   ! Multiplier of LC PRNG
+    integer(shortInt),parameter      :: M = 65521  ! Modulus of PRNG
+
+    ! Load batchSize
+    tst % batchSize = testParam % i
+    tst % Ncycles   = 10 * tst % batchSize
+
+    ! Generate a vector of 20 pseudo-random numbers in <0;1>
+    ! Generator is not sophisticated but robust
+    seed = 9294
+    do i=1,200
+      seed = mod(A * seed , M)
+      random(i)    = seed / real(M,defReal)
+    end do
+
+    ! Generate some scores and calculate their sum and sum of squares
+    tst  % scores    = TWO + sin(PI * random - PI/2)
+    tst % scoresInt = int(random * 100, shortInt)
+
+  end function newTest
+
+!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+!! PROPER TESTS BEGIN HERE
+!!<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
+
+  !!
+  !! Test acoring for a case with batchSize == 1
+  !! Look at the end of the file to find MATLAB SCRIPT used to generate reference values
+  !!
+@Test(cases=[1])
+  subroutine testScoring(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+    integer(shortInt)                      :: i, j
+    real(defReal)                          :: res1, res2, STD
+    real(defReal), parameter :: TOL = 1.0E-9
+
+    ! Initialise score memory
+    call mem % init(7_longInt, 1, batchSize = this % batchSize)
+
+    ! Test getting batchSize
+    @assertEqual(this % batchSize, mem % getBatchSize(),'Test getBatchSize() :')
+
+    ! Score in
+    do i=1,10
+      ! Score
+      do j=20*(i-1)+1,20 * i
+        call mem % score(this % scores(j), 1_longInt)
+        call mem % score(this % scoresInt(j), 2_longInt)
+        call mem % score(int(this % scoresInt(j),longInt),3_longInt)
+        call mem % accumulate(this % scores(j), 4_longInt)
+        call mem % accumulate(this % scoresInt(j), 5_longInt)
+        call mem % accumulate(int(this % scoresInt(j),longInt),6_longInt)
+
+      end do
+      call mem % reduceBins()
+      ! Close a single bin with diffrent normalisation
+      call mem % closeBin(1.2_defReal, 3_longInt)
+
+      ! Close Cycle
+      call mem % closeCycle(0.7_defReal)
+
+    end do
+
+    ! Get results from bin 1
+    call mem % getResult(res1, 1_longInt)
+    call mem % getResult(res2, STD, 1_longInt)
+
+    @assertEqual(26.401471259728442_defReal, res1, TOL)
+    @assertEqual(26.401471259728442_defReal, res2, TOL)
+    @assertEqual(0.645969443981583_defReal, STD, TOL)
+
+    ! Get results from bin 2
+    call mem % getResult(res1, 2_longInt)
+    call mem % getResult(res2, STD, 2_longInt)
+
+    @assertEqual(623.0_defReal, res1, TOL)
+    @assertEqual(623.0_defReal, res2, TOL)
+    @assertEqual(27.982494527829360_defReal, STD, TOL)
+
+    ! Get results from bin 3
+    call mem % getResult(res1, 3_longInt)
+    call mem % getResult(res2, STD, 3_longInt)
+
+    @assertEqual(1068.0_defReal, res1, TOL)
+    @assertEqual(1068.0_defReal, res2, TOL)
+    @assertEqual(47.969990619136050_defReal, STD, TOL)
+
+    ! Get results from bin 4
+    call mem % getResult(res1, 4_longInt, 200)
+    call mem % getResult(res2, STD, 4_longInt, 200)
+
+    @assertEqual(1.885819375694888_defReal, res1, TOL)
+    @assertEqual(1.885819375694888_defReal, res2, TOL)
+    @assertEqual(0.049102082638055_defReal, STD, TOL)
+
+    ! Get results from bin 5
+    call mem % getResult(res1, 5_longInt, 200)
+    call mem % getResult(res2, STD, 5_longInt, 200)
+
+    @assertEqual(44.500000000000000_defReal, res1, TOL)
+    @assertEqual(44.500000000000000_defReal, res2, TOL)
+    @assertEqual(2.015580019267494_defReal, STD, TOL)
+
+    ! Get results from bin 6
+    call mem % getResult(res1, 6_longInt, 200)
+    call mem % getResult(res2, STD, 6_longInt, 200)
+
+    @assertEqual(44.500000000000000_defReal, res1, TOL)
+    @assertEqual(44.500000000000000_defReal, res2, TOL)
+    @assertEqual(2.015580019267494_defReal, STD, TOL)
+
+    ! Get results from an empty bin 7
+    call mem % getResult(res1, 7_longInt)
+    call mem % getResult(res2, STD, 7_longInt)
+
+    @assertEqual(ZERO, res1, TOL)
+    @assertEqual(ZERO, res2, TOL)
+    @assertEqual(ZERO, STD, TOL)
+
+    ! Get results from invalid bins
+    call mem % getResult(res1, -7_longInt)
+    call mem % getResult(res2, STD, -7_longInt)
+
+    @assertEqual(ZERO, res1, TOL)
+    @assertEqual(ZERO, res2, TOL)
+    @assertEqual(ZERO, STD, TOL)
+
+    call mem % getResult(res1, 8_longInt)
+    call mem % getResult(res2, STD, 8_longInt)
+
+    @assertEqual(ZERO, res1, TOL)
+    @assertEqual(ZERO, res2, TOL)
+    @assertEqual(ZERO, STD, TOL)
+
+    ! Free memory
+    call mem % kill()
+
+  end subroutine testScoring
+
+  !!
+  !! Test lastCycle
+  !! Ignors test parametrisation
+  !!
+@Test(cases=[1])
+  subroutine testLastCycle(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+    integer(shortInt)                      :: i
+
+    call mem % init(1_longInt, 1, batchSize = 8)
+
+    ! Test getting batchSize
+    @assertEqual(8, mem % getBatchSize(),'Test getBatchSize() :')
+
+    do i=1,16
+      if(i == 8 .or. i == 16) then
+        @assertTrue( mem % lastCycle(), 'In cycle num: '//numToChar(i))
+      else
+        @assertFalse( mem % lastCycle(), 'In cycle num: '//numToChar(i))
+      end if
+      call mem % closeCycle(ONE)
+    end do
+
+    call mem % kill()
+
+  end subroutine testLastCycle
+
+  !!
+  !! Test get score
+  !! Ignore test parametrisation
+  !!
+@Test(cases=[1])
+  subroutine testGetScore(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+    real(defReal),parameter :: TOL = 1.0E-9
+
+    call mem % init(1_longInt, 1)
+
+    call mem % score(ONE,1_longInt)
+    call mem % score(ONE,1_longInt)
+    call mem % score(ONE,1_longInt)
+    call mem % reduceBins()
+
+    @assertEqual(3*ONE, mem % getScore(1_longInt), TOL, 'Test getScore, valid bin:')
+    @assertEqual(ZERO, mem % getScore(0_longInt), TOL, 'Test getScore, not +ve bin:')
+    @assertEqual(ZERO, mem % getScore(2_longInt), TOL, 'Test getScore, too large bin:')
+
+  end subroutine testGetScore
+
+  !!
+  !! Test killing uninitialised scoreMemory
+  !!
+@Test(cases=[1])
+  subroutine testKillUnalloc(this)
+    class(test_scoreMemory), intent(inout) :: this
+    type(scoreMemory)                      :: mem
+
+    call mem % kill()
+
+  end subroutine testKillUnalloc
+
+end module scoreMemory_test
+!! MATLAB SCRIPT USED TO GENERATE REFERENCE VALUES
+!clear
+!rand = zeros(20,1);
+!seed = 9294;
+!
+!%LCG Params
+!A = 2469;
+!M = 65521;
+!
+!for i=1:1:200
+!  seed = mod(A * seed, M);
+!  rand(i) = seed/M;
+!end
+!
+!% Calculate scores vector
+!scores = 2.0 + sin(pi() .* rand - pi()/2);
+!scoresInt = floor(100.*rand);
+!
+!% Accumulate results
+!resAcc = mean(scores)
+!stdAcc = sqrt(var(scores)./200)
+!
+!resAccInt = mean(scoresInt)
+!stdAccInt = sqrt(var(scoresInt)./200)
+!
+!% Reshape scores
+!scores = reshape(scores,[20,10]);
+!scores = sum(scores,1)* 0.7;
+!res = mean(scores)
+!std = sqrt(var(scores)./10)
+!
+!% Reshape scores
+!scoresInt = reshape(scoresInt,[20,10]);
+!scoresInt = sum(scoresInt,1)* 0.7;
+!resInt = mean(scoresInt)
 !stdInt = sqrt(var(scoresInt)./10)
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index ffb5e2951..2d0a6fabb 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -1,598 +1,598 @@
-module scoreMemory_class
-
-  use numPrecision
-#ifdef MPI
-  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DEFREAL, MPI_COMM_WORLD, &
-                                 MASTER_RANK, isMPIMaster, MPI_SHORTINT
-#endif
-  use universalVariables, only : array_pad
-  use genericProcedures,  only : fatalError, numToChar
-  use openmp_func,        only : ompGetMaxThreads, ompGetThreadNum
-
-  implicit none
-  private
-
-  !! Parameters for indexes of per cycle SCORE, Cumulative Sum and Cumulative Sum of squares
-  integer(shortInt), parameter :: BIN = 1, &
-                                  CSUM  = 2, &
-                                  CSUM2 = 3
-
-  !! Size of the 2nd Dimension of bins
-  integer(shortInt), parameter :: DIM2 = 3
-
-
-  !!
-  !! scoreMemory is a class that stores space for scores from tallies.
-  !! It is separate from tallyClerks and individual responses to allow:
-  !!   -> Easy writing and (later) reading from file for archivisation of results
-  !!   -> Easy possibility of extention to tally higher moments of result
-  !!   -> Possibility of extension to tally covariance of selected tally bins
-  !!   -> Easy copying and recombination of results for OpenMP shared memory parallelism
-  !!   -> Easy, output format-independent way to perform regression tests
-  !!   -> Easy handling of different batch sizes
-  !!
-  !! For every bin index there are two positions: CSUM, CSUM2. All are initialised to 0.
-  !! For scoring, an array is created with dimension (Nbins,nThreads) to mitigate false sharing.
-  !! On accumulation, this array adds to the normal bin array.
-  !!
-  !! Interface:
-  !!     init(N,idBS): Initialise with integer size N and integer id. Optional integer Batch Size.
-  !!
-  !!     kill(): Elemental. Return to uninitialised state.
-  !!
-  !!     score(score,idx): Score in the bin under idx. FatalError if idx is outside bounds. Score
-  !!         is defReal, shortInt or longInt
-  !!
-  !!     accumulate(score,idx): Accumulate result in cumulative sums in bin under idx. FatalError
-  !!         if idx is outside bounds. Score is defReal, shortInt or longInt.
-  !!
-  !!     getResult(mean, STD, idx, samples): Retrieve mean value and standard deviation of the
-  !!         estimate under idx. Use optional samples to specify number of estimates used to
-  !!         evaluate mean and STD from default, which is number of batches in score memory.
-  !!         STD is optional.
-  !!
-  !!     getScore(idx): Return current value of score from bin under idx. FatalError if idx is
-  !!         outside bounds.
-  !!
-  !!     closeBin(normFactor,idx): Multiplies score under bin by normFactor and accumulates it in
-  !!         cumulative sums. Then sets the bin to zero.
-  !!
-  !!     closeCycle(normFactor): Multiplies all scores by normFactor and accumulates them in
-  !!         cumulative sums. Sets all scors to zero.
-  !!
-  !!     lastCycle(): Return true if the next call to closeCycle will close a batch.
-  !!
-  !!     getBatchSize(): Returns number of cycles that constitute a single batch.
-  !!
-  !!     reduceBins(): Move the scores from parallelBins and different processes to bins.
-  !!
-  !! Example use case:
-  !!
-  !!  do batches=1,20
-  !!    do hist=1,10
-  !!      call scoreMem % score(hist,1)        ! Score hist (1,10) in bin 1
-  !!      call scoreMem % accumulate(hist,2)   ! Accumulate hist in CSUMs of bin 2
-  !!    end do
-  !!    call scoreMem % closeCycle(ONE)        ! Close batch without normalisation (factor = ONE)
-  !!  end do
-  !!
-  !!  call scoreMem % getResult(mean,STD,1) ! Get result from bin 1 with STD
-  !!  call scoreMem % getResult(mean,2,200) ! Get mean from bin 2 assuming 200 samples
-  !!
-  !! NOTE:  Following indexing is used in bins class member
-  !!        bins(binIndex,binType) binType is CSUM/CSUM2
-  !! NOTE2: If batch size is not a denominator of cycles scored results accumulated
-  !!        in extra cycles are discarded in current implementation
-  !!
-  type, public :: scoreMemory
-      !private
-      real(defReal),dimension(:,:),allocatable :: bins              !! Space for storing cumul data (2nd dim size is always 3!)
-      real(defReal),dimension(:,:),allocatable :: parallelBins      !! Space for scoring for different threads
-      integer(longInt)                         :: N = 0             !! Size of memory (number of bins)
-      integer(shortInt)                        :: nThreads = 0      !! Number of threads used for parallelBins
-      integer(shortInt)                        :: id                !! Id of the tally
-      integer(shortInt)                        :: batchN = 0        !! Number of Batches
-      integer(shortInt)                        :: cycles = 0        !! Cycles counter
-      integer(shortInt)                        :: batchSize = 1     !! Batch interval size (in cycles)
-      logical(defBool)                         :: reduced = .false. !! True if bins have been reduced
-  contains
-    ! Interface procedures
-    procedure :: init
-    procedure :: kill
-    generic   :: score      => score_defReal, score_shortInt, score_longInt
-    generic   :: accumulate => accumulate_defReal, accumulate_shortInt, accumulate_longInt
-    generic   :: getResult  => getResult_withSTD, getResult_withoutSTD
-    procedure :: getScore
-    procedure :: closeCycle
-    procedure :: closeBin
-    procedure :: lastCycle
-    procedure :: getBatchSize
-    procedure :: reduceBins
-    procedure :: collectDistributed
-
-    ! Private procedures
-    procedure, private :: score_defReal
-    procedure, private :: score_shortInt
-    procedure, private :: score_longInt
-    procedure, private :: accumulate_defReal
-    procedure, private :: accumulate_shortInt
-    procedure, private :: accumulate_longInt
-    procedure, private :: getResult_withSTD
-    procedure, private :: getResult_withoutSTD
-
-  end type scoreMemory
-
-contains
-
-  !!
-  !! Allocate space for the bins given number of bins N
-  !! Optionaly change batchSize from 1 to any +ve number
-  !!
-  subroutine init(self, N, id, batchSize, reduced)
-    class(scoreMemory),intent(inout)      :: self
-    integer(longInt),intent(in)           :: N
-    integer(shortInt),intent(in)          :: id
-    integer(shortInt),optional,intent(in) :: batchSize
-    logical(defBool),optional,intent(in)  :: reduced
-    character(100), parameter :: Here= 'init (scoreMemory_class.f90)'
-
-    ! Allocate space and zero all bins
-    allocate(self % bins(N, DIM2))
-    self % bins = ZERO
-
-    self % nThreads = ompGetMaxThreads()
-
-    ! Note the array padding to avoid false sharing
-    allocate(self % parallelBins(N + array_pad, self % nThreads))
-    self % parallelBins = ZERO
-
-    ! Save size of memory
-    self % N = N
-
-    ! Assign memory id
-    self % id = id
-
-    ! Set batchN, cycles and batchSize to default values
-    self % batchN    = 0
-    self % cycles    = 0
-    self % batchSize = 1
-
-    if (present(batchSize)) then
-      if (batchSize > 0) then
-        self % batchSize = batchSize
-      else
-        call fatalError(Here,'Batch Size of: '// numToChar(batchSize) //' is invalid')
-      end if
-    end if
-
-    if (present(reduced)) then
-      self % reduced = reduced
-    end if
-
-  end subroutine init
-
-  !!
-  !! Deallocate memory and return to uninitialised state
-  !!
-  subroutine kill(self)
-   class(scoreMemory), intent(inout) :: self
-
-   if(allocated(self % bins)) deallocate(self % bins)
-   if(allocated(self % parallelBins)) deallocate(self % parallelBins)
-   self % N = 0
-   self % nThreads = 0
-   self % batchN = 0
-
-  end subroutine kill
-
-  !!
-  !! Score a result on a given single bin under idx
-  !!
-  subroutine score_defReal(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal), intent(in)         :: score
-    integer(longInt), intent(in)      :: idx
-    integer(shortInt)                 :: thread_idx
-    character(100),parameter :: Here = 'score_defReal (scoreMemory_class.f90)'
-
-    ! Verify bounds for the index
-    if( idx < 0_longInt .or. idx > self % N) then
-      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
-                            & memory with size '//numToChar(self % N))
-    end if
-
-    ! Add the score
-    thread_idx = ompGetThreadNum() + 1
-    self % parallelBins(idx, thread_idx) = &
-            self % parallelBins(idx, thread_idx) + score
-
-  end subroutine score_defReal
-
-  !!
-  !! Score a result with shortInt on a given bin under idx
-  !!
-  subroutine score_shortInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(shortInt), intent(in)     :: score
-    integer(longInt), intent(in)      :: idx
-
-    call self % score_defReal(real(score, defReal), idx)
-
-  end subroutine score_shortInt
-
-  !!
-  !! Score a result with longInt on a given bin under idx
-  !!
-  subroutine score_longInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(longInt), intent(in)      :: score
-    integer(longInt), intent(in)     :: idx
-
-    call self % score_defReal(real(score, defReal), idx)
-
-  end subroutine score_longInt
-
-  !!
-  !! Increment the result directly on cumulative sums
-  !!
-  subroutine accumulate_defReal(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal), intent(in)         :: score
-    integer(longInt), intent(in)      :: idx
-    character(100),parameter :: Here = 'accumulate_defReal (scoreMemory_class.f90)'
-
-    ! Verify bounds for the index
-    if( idx < 0_longInt .or. idx > self % N) then
-      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
-                            & memory with size '//numToChar(self % N))
-    end if
-
-    ! Add the score
-    self % bins(idx, CSUM)  = self % bins(idx, CSUM)  + score
-    self % bins(idx, CSUM2) = self % bins(idx, CSUM2) + score * score
-
-  end subroutine accumulate_defReal
-
-  !!
-  !! Increment the result directly on cumulative sums with shortInt score
-  !!
-  subroutine accumulate_shortInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(shortInt), intent(in)     :: score
-    integer(longInt), intent(in)     :: idx
-
-    call self % accumulate_defReal(real(score, defReal), idx)
-
-  end subroutine accumulate_shortInt
-
-  !!
-  !! Increment the result directly on cumulative sums with longInt score
-  !!
-  subroutine accumulate_longInt(self, score, idx)
-    class(scoreMemory), intent(inout) :: self
-    integer(longInt), intent(in)      :: score
-    integer(longInt), intent(in)      :: idx
-
-    call self % accumulate_defReal(real(score, defReal), idx)
-
-  end subroutine accumulate_longInt
-
-  !!
-  !! Close Cycle
-  !! Increments cycle counter and detects end-of-batch
-  !! When batch finishes it normalises all scores by the factor and moves them to CSUMs
-  !!
-  subroutine closeCycle(self, normFactor)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal),intent(in)          :: normFactor
-    integer(longInt)                  :: i
-    real(defReal), save               :: res
-    !$omp threadprivate(res)
-
-    ! Increment Cycle Counter
-    self % cycles = self % cycles + 1
-
-    if (mod(self % cycles, self % batchSize) == 0) then ! Close Batch
-
-      !$omp parallel do
-      do i = 1, self % N
-
-        ! Normalise scores
-        res = self % bins(i, BIN) * normFactor
-
-        ! Zero all score bins
-        self % bins(i, BIN) = ZERO
-
-        ! Increment cumulative sums
-        self % bins(i,CSUM)  = self % bins(i,CSUM) + res
-        self % bins(i,CSUM2) = self % bins(i,CSUM2) + res * res
-
-      end do
-      !$omp end parallel do
-
-      ! Increment batch counter
-      self % batchN = self % batchN + 1
-
-    end if
-
-  end subroutine closeCycle
-
-  !!
-  !! Close Cycle
-  !! Multiplies score in bin under idx by normFactor, accumulates it and sets it to zero
-  !!
-  subroutine closeBin(self, normFactor, idx)
-    class(scoreMemory), intent(inout) :: self
-    real(defReal),intent(in)          :: normFactor
-    integer(longInt), intent(in)      :: idx
-    real(defReal)                     :: res
-    character(100),parameter :: Here = 'closeBin (scoreMemory_class.f90)'
-
-    ! Verify bounds for the index
-    if (idx < 0_longInt .or. idx > self % N) then
-      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
-                            & memory with size '//numToChar(self % N))
-    end if
-
-    ! Normalise score
-    res = self % bins(idx, BIN) * normFactor
-
-    ! Increment cumulative sum
-    self % bins(idx,CSUM)  = self % bins(idx,CSUM) + res
-    self % bins(idx,CSUM2) = self % bins(idx,CSUM2) + res * res
-
-    ! Zero the score
-    self % bins(idx, BIN) = ZERO
-
-  end subroutine closeBin
-
-
-  !!
-  !! Return true if next closeCycle will close a batch
-  !!
-  function lastCycle(self) result(isIt)
-    class(scoreMemory), intent(in) :: self
-    logical(defBool)               :: isIt
-
-    isIt =  mod(self % cycles + 1, self % batchSize) == 0
-
-  end function lastCycle
-
-  !!
-  !! Return batchSize
-  !!
-  pure function getBatchSize(self) result(S)
-    class(scoreMemory), intent(in) :: self
-    integer(shortInt)              :: S
-
-    S = self % batchSize
-
-  end function getBatchSize
-
-  !!
-  !! Combine the bins across threads and processes
-  !!
-  !! NOTE:
-  !!  Need to be called before reporting CycleEnd to the clerks or calling closeCycle.
-  !!  If it is not the case the results will be incorrect. This is not ideal design
-  !!  and probably should be improved in the future.
-  !!
-  subroutine reduceBins(self)
-    class(scoreMemory), intent(inout) :: self
-    integer(longInt)                  :: i
-    character(100),parameter :: Here = 'reduceBins (scoreMemory_class.f90)'
-
-    if (self % lastCycle()) then
-
-      !$omp parallel do
-      do i = 1, self % N
-        self % bins(i, BIN) = sum(self % parallelBins(i,:))
-        self % parallelBins(i,:) = ZERO
-      end do
-      !$omp end parallel do
-
-      ! Reduce across processes
-      ! We use the parallelBins array as a temporary storage
-#ifdef MPI
-      ! Since the number of bins is limited by 64bit signed integer and the
-      ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
-      ! to split the reduction operation into chunks
-      if (self % reduced) then
-        call reduceArray(self % bins(:,BIN), self % parallelBins(:,1))
-      end if
-#endif
-
-    end if
-
-  end subroutine reduceBins
-
-  !!
-  !! Reduce the accumulated results (csum and csum2) from different MPI processes
-  !!
-  !! If the bins are `reduced` (that is, scores are collected at the end of each cycle),
-  !! then this subroutine does nothing. Otherwise it collects the results from different
-  !! processes and stores them in the master process.
-  !!
-  !! The estimates from each process are treated as independent simulation, thus the
-  !! cumulative sums are added together and the batch count is summed.
-  !!
-  subroutine collectDistributed(self)
-    class(scoreMemory), intent(inout) :: self
-#ifdef MPI
-    integer(shortInt)                 :: error, buffer
-
-    if (.not. self % reduced) then
-      ! Reduce the batch count
-      ! Note we need to use size 1 arrays to fit the interface of mpi_reduce, which expects
-      ! to be given arrays
-      call mpi_reduce(self % batchN, buffer, 1, MPI_SHORTINT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
-      if (isMPIMaster()) then
-        self % batchN = buffer
-      else
-        self % batchN = 0
-      end if
-
-      ! Reduce the cumulative sums
-      call reduceArray(self % bins(:,CSUM), self % parallelBins(:,1))
-
-      ! Reduce the cumulative sums of squares
-      call reduceArray(self % bins(:,CSUM2), self % parallelBins(:,1))
-    end if
-
-#endif
-  end subroutine collectDistributed
-
-  !!
-  !! Reduce the array across different processes
-  !!
-  !! Wrapper around MPI_Reduce to support arrays of defReal larger than 2^31
-  !! This function is only defined if MPI is enabled
-  !!
-  !! Args:
-  !!   data [inout] ->  Array with the date to be reduced
-  !!   buffer [inout] -> Buffer to store the reduced data (must be same size or larger than data)
-  !!
-  !! Result:
-  !!   The sum of the data across all processes in stored on master process `data`
-  !!   The buffer is set to ZERO on all processes (only 1:size(data) range)!
-  !!
-  !! Errors:
-  !!   fatalError if size of the buffer is insufficient
-  !!
-#ifdef MPI
-  subroutine reduceArray(data, buffer)
-    real(defReal), dimension(:), intent(inout) :: data
-    real(defReal), dimension(:), intent(inout) :: buffer
-    integer(longInt)                           :: N, chunk, start
-    integer(shortInt)                          :: error
-    character(100),parameter :: Here = 'reduceArray (scoreMemory_class.f90)'
-
-    ! We need to be careful to support sizes larger than 2^31
-    N = size(data, kind = longInt)
-
-    ! Check if the buffer is large enough
-    if (size(buffer, kind = longInt) < N) then
-      call fatalError(Here, 'Buffer is too small to store the reduced data')
-    end if
-
-    ! Since the number of bins is limited by 64bit signed integer and the
-    ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
-    ! to split the reduction operation into chunks
-    start = 1
-
-    do while (start <= N)
-
-      chunk = min(N - start + 1, int(huge(1_shortInt), longInt))
-      call mpi_reduce(data(start : start + chunk - 1), &
-                      buffer(start : start + chunk - 1), &
-                      int(chunk, shortInt), &
-                      MPI_DEFREAL, &
-                      MPI_SUM, &
-                      MASTER_RANK, &
-                      MPI_COMM_WORLD, &
-                      error)
-
-      start = start + chunk
-
-    end do
-
-    ! Copy the result back to data
-    data = buffer(1:N)
-
-    ! Clean buffer
-    buffer(1:N) = ZERO
-
-  end subroutine reduceArray
-#endif
-
-  !!
-  !! Load mean result and Standard deviation into provided arguments
-  !! Load from bin indicated by idx
-  !! Returns 0 if index is invalid
-  !!
-  elemental subroutine getResult_withSTD(self, mean, STD, idx, samples)
-    class(scoreMemory), intent(in)          :: self
-    real(defReal), intent(out)              :: mean
-    real(defReal),intent(out)               :: STD
-    integer(longInt), intent(in)            :: idx
-    integer(shortInt), intent(in), optional :: samples
-    integer(shortInt)                       :: N
-    real(defReal)                           :: inv_N, inv_Nm1
-
-    !! Verify index. Return 0 if not present
-    if (idx < 0_longInt .or. idx > self % N) then
-      mean = ZERO
-      STD = ZERO
-      return
-    end if
-
-    ! Check if # of samples is provided
-    if (present(samples)) then
-      N = samples
-    else
-      N = self % batchN
-    end if
-
-    ! Calculate mean
-    mean = self % bins(idx, CSUM) / N
-
-    ! Calculate STD
-    inv_N   = ONE / N
-    if (N /= 1) then
-      inv_Nm1 = ONE / (N - 1)
-    else
-      inv_Nm1 = ONE
-    end if
-    STD = self % bins(idx, CSUM2) *inv_N * inv_Nm1 - mean * mean * inv_Nm1
-    STD = sqrt(STD)
-
-  end subroutine getResult_withSTD
-
-  !!
-  !! Load mean result provided argument
-  !! Load from bin indicated by idx
-  !! Returns 0 if index is invalid
-  !!
-  elemental subroutine getResult_withoutSTD(self, mean, idx, samples)
-    class(scoreMemory), intent(in)          :: self
-    real(defReal), intent(out)              :: mean
-    integer(longInt), intent(in)            :: idx
-    integer(shortInt), intent(in), optional :: samples
-    integer(shortInt)                       :: N
-
-    !! Verify index. Return 0 if not present
-    if (idx < 0_longInt .or. idx > self % N) then
-      mean = ZERO
-      return
-    end if
-
-    ! Check if # of samples is provided
-    if( present(samples)) then
-      N = samples
-    else
-      N = self % batchN
-    end if
-
-    ! Calculate mean
-    mean = self % bins(idx, CSUM) / N
-
-  end subroutine getResult_withoutSTD
-
-  !!
-  !! Obtain value of a score in a bin
-  !! Return ZERO for invalid bin address (idx)
-  !!
-  elemental function getScore(self, idx) result (score)
-    class(scoreMemory), intent(in) :: self
-    integer(longInt), intent(in)   :: idx
-    real(defReal)                  :: score
-
-    if (idx <= 0_longInt .or. idx > self % N) then
-      score = ZERO
-    else
-      score = self % bins(idx, BIN)
-    end if
-
-  end function getScore
-
-end module scoreMemory_class
+module scoreMemory_class
+
+  use numPrecision
+#ifdef MPI
+  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DOUBLE, MPI_COMM_WORLD, &
+                                 MASTER_RANK, isMPIMaster, MPI_INT
+#endif
+  use universalVariables, only : array_pad
+  use genericProcedures,  only : fatalError, numToChar
+  use openmp_func,        only : ompGetMaxThreads, ompGetThreadNum
+
+  implicit none
+  private
+
+  !! Parameters for indexes of per cycle SCORE, Cumulative Sum and Cumulative Sum of squares
+  integer(shortInt), parameter :: BIN = 1, &
+                                  CSUM  = 2, &
+                                  CSUM2 = 3
+
+  !! Size of the 2nd Dimension of bins
+  integer(shortInt), parameter :: DIM2 = 3
+
+
+  !!
+  !! scoreMemory is a class that stores space for scores from tallies.
+  !! It is separate from tallyClerks and individual responses to allow:
+  !!   -> Easy writing and (later) reading from file for archivisation of results
+  !!   -> Easy possibility of extention to tally higher moments of result
+  !!   -> Possibility of extension to tally covariance of selected tally bins
+  !!   -> Easy copying and recombination of results for OpenMP shared memory parallelism
+  !!   -> Easy, output format-independent way to perform regression tests
+  !!   -> Easy handling of different batch sizes
+  !!
+  !! For every bin index there are two positions: CSUM, CSUM2. All are initialised to 0.
+  !! For scoring, an array is created with dimension (Nbins,nThreads) to mitigate false sharing.
+  !! On accumulation, this array adds to the normal bin array.
+  !!
+  !! Interface:
+  !!     init(N,idBS): Initialise with integer size N and integer id. Optional integer Batch Size.
+  !!
+  !!     kill(): Elemental. Return to uninitialised state.
+  !!
+  !!     score(score,idx): Score in the bin under idx. FatalError if idx is outside bounds. Score
+  !!         is defReal, shortInt or longInt
+  !!
+  !!     accumulate(score,idx): Accumulate result in cumulative sums in bin under idx. FatalError
+  !!         if idx is outside bounds. Score is defReal, shortInt or longInt.
+  !!
+  !!     getResult(mean, STD, idx, samples): Retrieve mean value and standard deviation of the
+  !!         estimate under idx. Use optional samples to specify number of estimates used to
+  !!         evaluate mean and STD from default, which is number of batches in score memory.
+  !!         STD is optional.
+  !!
+  !!     getScore(idx): Return current value of score from bin under idx. FatalError if idx is
+  !!         outside bounds.
+  !!
+  !!     closeBin(normFactor,idx): Multiplies score under bin by normFactor and accumulates it in
+  !!         cumulative sums. Then sets the bin to zero.
+  !!
+  !!     closeCycle(normFactor): Multiplies all scores by normFactor and accumulates them in
+  !!         cumulative sums. Sets all scors to zero.
+  !!
+  !!     lastCycle(): Return true if the next call to closeCycle will close a batch.
+  !!
+  !!     getBatchSize(): Returns number of cycles that constitute a single batch.
+  !!
+  !!     reduceBins(): Move the scores from parallelBins and different processes to bins.
+  !!
+  !! Example use case:
+  !!
+  !!  do batches=1,20
+  !!    do hist=1,10
+  !!      call scoreMem % score(hist,1)        ! Score hist (1,10) in bin 1
+  !!      call scoreMem % accumulate(hist,2)   ! Accumulate hist in CSUMs of bin 2
+  !!    end do
+  !!    call scoreMem % closeCycle(ONE)        ! Close batch without normalisation (factor = ONE)
+  !!  end do
+  !!
+  !!  call scoreMem % getResult(mean,STD,1) ! Get result from bin 1 with STD
+  !!  call scoreMem % getResult(mean,2,200) ! Get mean from bin 2 assuming 200 samples
+  !!
+  !! NOTE:  Following indexing is used in bins class member
+  !!        bins(binIndex,binType) binType is CSUM/CSUM2
+  !! NOTE2: If batch size is not a denominator of cycles scored results accumulated
+  !!        in extra cycles are discarded in current implementation
+  !!
+  type, public :: scoreMemory
+      !private
+      real(defReal),dimension(:,:),allocatable :: bins              !! Space for storing cumul data (2nd dim size is always 3!)
+      real(defReal),dimension(:,:),allocatable :: parallelBins      !! Space for scoring for different threads
+      integer(longInt)                         :: N = 0             !! Size of memory (number of bins)
+      integer(shortInt)                        :: nThreads = 0      !! Number of threads used for parallelBins
+      integer(shortInt)                        :: id                !! Id of the tally
+      integer(shortInt)                        :: batchN = 0        !! Number of Batches
+      integer(shortInt)                        :: cycles = 0        !! Cycles counter
+      integer(shortInt)                        :: batchSize = 1     !! Batch interval size (in cycles)
+      logical(defBool)                         :: reduced = .false. !! True if bins have been reduced
+  contains
+    ! Interface procedures
+    procedure :: init
+    procedure :: kill
+    generic   :: score      => score_defReal, score_shortInt, score_longInt
+    generic   :: accumulate => accumulate_defReal, accumulate_shortInt, accumulate_longInt
+    generic   :: getResult  => getResult_withSTD, getResult_withoutSTD
+    procedure :: getScore
+    procedure :: closeCycle
+    procedure :: closeBin
+    procedure :: lastCycle
+    procedure :: getBatchSize
+    procedure :: reduceBins
+    procedure :: collectDistributed
+
+    ! Private procedures
+    procedure, private :: score_defReal
+    procedure, private :: score_shortInt
+    procedure, private :: score_longInt
+    procedure, private :: accumulate_defReal
+    procedure, private :: accumulate_shortInt
+    procedure, private :: accumulate_longInt
+    procedure, private :: getResult_withSTD
+    procedure, private :: getResult_withoutSTD
+
+  end type scoreMemory
+
+contains
+
+  !!
+  !! Allocate space for the bins given number of bins N
+  !! Optionaly change batchSize from 1 to any +ve number
+  !!
+  subroutine init(self, N, id, batchSize, reduced)
+    class(scoreMemory),intent(inout)      :: self
+    integer(longInt),intent(in)           :: N
+    integer(shortInt),intent(in)          :: id
+    integer(shortInt),optional,intent(in) :: batchSize
+    logical(defBool),optional,intent(in)  :: reduced
+    character(100), parameter :: Here= 'init (scoreMemory_class.f90)'
+
+    ! Allocate space and zero all bins
+    allocate(self % bins(N, DIM2))
+    self % bins = ZERO
+
+    self % nThreads = ompGetMaxThreads()
+
+    ! Note the array padding to avoid false sharing
+    allocate(self % parallelBins(N + array_pad, self % nThreads))
+    self % parallelBins = ZERO
+
+    ! Save size of memory
+    self % N = N
+
+    ! Assign memory id
+    self % id = id
+
+    ! Set batchN, cycles and batchSize to default values
+    self % batchN    = 0
+    self % cycles    = 0
+    self % batchSize = 1
+
+    if (present(batchSize)) then
+      if (batchSize > 0) then
+        self % batchSize = batchSize
+      else
+        call fatalError(Here,'Batch Size of: '// numToChar(batchSize) //' is invalid')
+      end if
+    end if
+
+    if (present(reduced)) then
+      self % reduced = reduced
+    end if
+
+  end subroutine init
+
+  !!
+  !! Deallocate memory and return to uninitialised state
+  !!
+  subroutine kill(self)
+   class(scoreMemory), intent(inout) :: self
+
+   if(allocated(self % bins)) deallocate(self % bins)
+   if(allocated(self % parallelBins)) deallocate(self % parallelBins)
+   self % N = 0
+   self % nThreads = 0
+   self % batchN = 0
+
+  end subroutine kill
+
+  !!
+  !! Score a result on a given single bin under idx
+  !!
+  subroutine score_defReal(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal), intent(in)         :: score
+    integer(longInt), intent(in)      :: idx
+    integer(shortInt)                 :: thread_idx
+    character(100),parameter :: Here = 'score_defReal (scoreMemory_class.f90)'
+
+    ! Verify bounds for the index
+    if( idx < 0_longInt .or. idx > self % N) then
+      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
+                            & memory with size '//numToChar(self % N))
+    end if
+
+    ! Add the score
+    thread_idx = ompGetThreadNum() + 1
+    self % parallelBins(idx, thread_idx) = &
+            self % parallelBins(idx, thread_idx) + score
+
+  end subroutine score_defReal
+
+  !!
+  !! Score a result with shortInt on a given bin under idx
+  !!
+  subroutine score_shortInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(shortInt), intent(in)     :: score
+    integer(longInt), intent(in)      :: idx
+
+    call self % score_defReal(real(score, defReal), idx)
+
+  end subroutine score_shortInt
+
+  !!
+  !! Score a result with longInt on a given bin under idx
+  !!
+  subroutine score_longInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(longInt), intent(in)      :: score
+    integer(longInt), intent(in)     :: idx
+
+    call self % score_defReal(real(score, defReal), idx)
+
+  end subroutine score_longInt
+
+  !!
+  !! Increment the result directly on cumulative sums
+  !!
+  subroutine accumulate_defReal(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal), intent(in)         :: score
+    integer(longInt), intent(in)      :: idx
+    character(100),parameter :: Here = 'accumulate_defReal (scoreMemory_class.f90)'
+
+    ! Verify bounds for the index
+    if( idx < 0_longInt .or. idx > self % N) then
+      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
+                            & memory with size '//numToChar(self % N))
+    end if
+
+    ! Add the score
+    self % bins(idx, CSUM)  = self % bins(idx, CSUM)  + score
+    self % bins(idx, CSUM2) = self % bins(idx, CSUM2) + score * score
+
+  end subroutine accumulate_defReal
+
+  !!
+  !! Increment the result directly on cumulative sums with shortInt score
+  !!
+  subroutine accumulate_shortInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(shortInt), intent(in)     :: score
+    integer(longInt), intent(in)     :: idx
+
+    call self % accumulate_defReal(real(score, defReal), idx)
+
+  end subroutine accumulate_shortInt
+
+  !!
+  !! Increment the result directly on cumulative sums with longInt score
+  !!
+  subroutine accumulate_longInt(self, score, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(longInt), intent(in)      :: score
+    integer(longInt), intent(in)      :: idx
+
+    call self % accumulate_defReal(real(score, defReal), idx)
+
+  end subroutine accumulate_longInt
+
+  !!
+  !! Close Cycle
+  !! Increments cycle counter and detects end-of-batch
+  !! When batch finishes it normalises all scores by the factor and moves them to CSUMs
+  !!
+  subroutine closeCycle(self, normFactor)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal),intent(in)          :: normFactor
+    integer(longInt)                  :: i
+    real(defReal), save               :: res
+    !$omp threadprivate(res)
+
+    ! Increment Cycle Counter
+    self % cycles = self % cycles + 1
+
+    if (mod(self % cycles, self % batchSize) == 0) then ! Close Batch
+
+      !$omp parallel do
+      do i = 1, self % N
+
+        ! Normalise scores
+        res = self % bins(i, BIN) * normFactor
+
+        ! Zero all score bins
+        self % bins(i, BIN) = ZERO
+
+        ! Increment cumulative sums
+        self % bins(i,CSUM)  = self % bins(i,CSUM) + res
+        self % bins(i,CSUM2) = self % bins(i,CSUM2) + res * res
+
+      end do
+      !$omp end parallel do
+
+      ! Increment batch counter
+      self % batchN = self % batchN + 1
+
+    end if
+
+  end subroutine closeCycle
+
+  !!
+  !! Close Cycle
+  !! Multiplies score in bin under idx by normFactor, accumulates it and sets it to zero
+  !!
+  subroutine closeBin(self, normFactor, idx)
+    class(scoreMemory), intent(inout) :: self
+    real(defReal),intent(in)          :: normFactor
+    integer(longInt), intent(in)      :: idx
+    real(defReal)                     :: res
+    character(100),parameter :: Here = 'closeBin (scoreMemory_class.f90)'
+
+    ! Verify bounds for the index
+    if (idx < 0_longInt .or. idx > self % N) then
+      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
+                            & memory with size '//numToChar(self % N))
+    end if
+
+    ! Normalise score
+    res = self % bins(idx, BIN) * normFactor
+
+    ! Increment cumulative sum
+    self % bins(idx,CSUM)  = self % bins(idx,CSUM) + res
+    self % bins(idx,CSUM2) = self % bins(idx,CSUM2) + res * res
+
+    ! Zero the score
+    self % bins(idx, BIN) = ZERO
+
+  end subroutine closeBin
+
+
+  !!
+  !! Return true if next closeCycle will close a batch
+  !!
+  function lastCycle(self) result(isIt)
+    class(scoreMemory), intent(in) :: self
+    logical(defBool)               :: isIt
+
+    isIt =  mod(self % cycles + 1, self % batchSize) == 0
+
+  end function lastCycle
+
+  !!
+  !! Return batchSize
+  !!
+  pure function getBatchSize(self) result(S)
+    class(scoreMemory), intent(in) :: self
+    integer(shortInt)              :: S
+
+    S = self % batchSize
+
+  end function getBatchSize
+
+  !!
+  !! Combine the bins across threads and processes
+  !!
+  !! NOTE:
+  !!  Need to be called before reporting CycleEnd to the clerks or calling closeCycle.
+  !!  If it is not the case the results will be incorrect. This is not ideal design
+  !!  and probably should be improved in the future.
+  !!
+  subroutine reduceBins(self)
+    class(scoreMemory), intent(inout) :: self
+    integer(longInt)                  :: i
+    character(100),parameter :: Here = 'reduceBins (scoreMemory_class.f90)'
+
+    if (self % lastCycle()) then
+
+      !$omp parallel do
+      do i = 1, self % N
+        self % bins(i, BIN) = sum(self % parallelBins(i,:))
+        self % parallelBins(i,:) = ZERO
+      end do
+      !$omp end parallel do
+
+      ! Reduce across processes
+      ! We use the parallelBins array as a temporary storage
+#ifdef MPI
+      ! Since the number of bins is limited by 64bit signed integer and the
+      ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
+      ! to split the reduction operation into chunks
+      if (self % reduced) then
+        call reduceArray(self % bins(:,BIN), self % parallelBins(:,1))
+      end if
+#endif
+
+    end if
+
+  end subroutine reduceBins
+
+  !!
+  !! Reduce the accumulated results (csum and csum2) from different MPI processes
+  !!
+  !! If the bins are `reduced` (that is, scores are collected at the end of each cycle),
+  !! then this subroutine does nothing. Otherwise it collects the results from different
+  !! processes and stores them in the master process.
+  !!
+  !! The estimates from each process are treated as independent simulation, thus the
+  !! cumulative sums are added together and the batch count is summed.
+  !!
+  subroutine collectDistributed(self)
+    class(scoreMemory), intent(inout) :: self
+#ifdef MPI
+    integer(shortInt)                 :: error, buffer
+
+    if (.not. self % reduced) then
+      ! Reduce the batch count
+      ! Note we need to use size 1 arrays to fit the interface of mpi_reduce, which expects
+      ! to be given arrays
+      call mpi_reduce(self % batchN, buffer, 1, MPI_INT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      if (isMPIMaster()) then
+        self % batchN = buffer
+      else
+        self % batchN = 0
+      end if
+
+      ! Reduce the cumulative sums
+      call reduceArray(self % bins(:,CSUM), self % parallelBins(:,1))
+
+      ! Reduce the cumulative sums of squares
+      call reduceArray(self % bins(:,CSUM2), self % parallelBins(:,1))
+    end if
+
+#endif
+  end subroutine collectDistributed
+
+  !!
+  !! Reduce the array across different processes
+  !!
+  !! Wrapper around MPI_Reduce to support arrays of defReal larger than 2^31
+  !! This function is only defined if MPI is enabled
+  !!
+  !! Args:
+  !!   data [inout] ->  Array with the date to be reduced
+  !!   buffer [inout] -> Buffer to store the reduced data (must be same size or larger than data)
+  !!
+  !! Result:
+  !!   The sum of the data across all processes in stored on master process `data`
+  !!   The buffer is set to ZERO on all processes (only 1:size(data) range)!
+  !!
+  !! Errors:
+  !!   fatalError if size of the buffer is insufficient
+  !!
+#ifdef MPI
+  subroutine reduceArray(data, buffer)
+    real(defReal), dimension(:), intent(inout) :: data
+    real(defReal), dimension(:), intent(inout) :: buffer
+    integer(longInt)                           :: N, chunk, start
+    integer(shortInt)                          :: error
+    character(100),parameter :: Here = 'reduceArray (scoreMemory_class.f90)'
+
+    ! We need to be careful to support sizes larger than 2^31
+    N = size(data, kind = longInt)
+
+    ! Check if the buffer is large enough
+    if (size(buffer, kind = longInt) < N) then
+      call fatalError(Here, 'Buffer is too small to store the reduced data')
+    end if
+
+    ! Since the number of bins is limited by 64bit signed integer and the
+    ! maximum `count` in mpi_reduce call is 32bit signed integer, we may need
+    ! to split the reduction operation into chunks
+    start = 1
+
+    do while (start <= N)
+
+      chunk = min(N - start + 1, int(huge(1_shortInt), longInt))
+      call mpi_reduce(data(start : start + chunk - 1), &
+                      buffer(start : start + chunk - 1), &
+                      int(chunk, shortInt), &
+                      MPI_DOUBLE, &
+                      MPI_SUM, &
+                      MASTER_RANK, &
+                      MPI_COMM_WORLD, &
+                      error)
+
+      start = start + chunk
+
+    end do
+
+    ! Copy the result back to data
+    data = buffer(1:N)
+
+    ! Clean buffer
+    buffer(1:N) = ZERO
+
+  end subroutine reduceArray
+#endif
+
+  !!
+  !! Load mean result and Standard deviation into provided arguments
+  !! Load from bin indicated by idx
+  !! Returns 0 if index is invalid
+  !!
+  elemental subroutine getResult_withSTD(self, mean, STD, idx, samples)
+    class(scoreMemory), intent(in)          :: self
+    real(defReal), intent(out)              :: mean
+    real(defReal),intent(out)               :: STD
+    integer(longInt), intent(in)            :: idx
+    integer(shortInt), intent(in), optional :: samples
+    integer(shortInt)                       :: N
+    real(defReal)                           :: inv_N, inv_Nm1
+
+    !! Verify index. Return 0 if not present
+    if (idx < 0_longInt .or. idx > self % N) then
+      mean = ZERO
+      STD = ZERO
+      return
+    end if
+
+    ! Check if # of samples is provided
+    if (present(samples)) then
+      N = samples
+    else
+      N = self % batchN
+    end if
+
+    ! Calculate mean
+    mean = self % bins(idx, CSUM) / N
+
+    ! Calculate STD
+    inv_N   = ONE / N
+    if (N /= 1) then
+      inv_Nm1 = ONE / (N - 1)
+    else
+      inv_Nm1 = ONE
+    end if
+    STD = self % bins(idx, CSUM2) *inv_N * inv_Nm1 - mean * mean * inv_Nm1
+    STD = sqrt(STD)
+
+  end subroutine getResult_withSTD
+
+  !!
+  !! Load mean result provided argument
+  !! Load from bin indicated by idx
+  !! Returns 0 if index is invalid
+  !!
+  elemental subroutine getResult_withoutSTD(self, mean, idx, samples)
+    class(scoreMemory), intent(in)          :: self
+    real(defReal), intent(out)              :: mean
+    integer(longInt), intent(in)            :: idx
+    integer(shortInt), intent(in), optional :: samples
+    integer(shortInt)                       :: N
+
+    !! Verify index. Return 0 if not present
+    if (idx < 0_longInt .or. idx > self % N) then
+      mean = ZERO
+      return
+    end if
+
+    ! Check if # of samples is provided
+    if( present(samples)) then
+      N = samples
+    else
+      N = self % batchN
+    end if
+
+    ! Calculate mean
+    mean = self % bins(idx, CSUM) / N
+
+  end subroutine getResult_withoutSTD
+
+  !!
+  !! Obtain value of a score in a bin
+  !! Return ZERO for invalid bin address (idx)
+  !!
+  elemental function getScore(self, idx) result (score)
+    class(scoreMemory), intent(in) :: self
+    integer(longInt), intent(in)   :: idx
+    real(defReal)                  :: score
+
+    if (idx <= 0_longInt .or. idx > self % N) then
+      score = ZERO
+    else
+      score = self % bins(idx, BIN)
+    end if
+
+  end function getScore
+
+end module scoreMemory_class
diff --git a/docs/User Manual.rst b/docs/Input Manual.rst
similarity index 99%
rename from docs/User Manual.rst
rename to docs/Input Manual.rst
index 225792db1..a8c1267cc 100644
--- a/docs/User Manual.rst	
+++ b/docs/Input Manual.rst	
@@ -1,7 +1,7 @@
-.. _user-manual:
+.. _input-manual:
 
-User Manual
-===========
+Input Manual
+============
 
 Generic information about how to use dictionaries in writing an input file can be found
 in :ref:`Dictionary Input <dictSyntax>`. Here, more specific information about the input
diff --git a/docs/Installation.rst b/docs/Installation.rst
index 5eed6dbd8..98873e7a0 100644
--- a/docs/Installation.rst
+++ b/docs/Installation.rst
@@ -32,6 +32,9 @@ Requirements
      python interpreter. NOTE that version 4 (contrarily to the older 3.0) requires the use of
      gfortran version 8.3 or newer. 
 
+   MPI and OpenMP for parallelism
+
+
 Getting gfortran
 ''''''''''''''''
 To verify that you have gfortran available by typing::
@@ -100,7 +103,6 @@ version of CMake. If you don't you can follow the instructions.
 
       export PATH=/cmake/install/folder/bin:$PATH
 
-
 Installing pFUnit
 '''''''''''''''''
 This is only required if the unit tests are to be build.
@@ -114,11 +116,23 @@ This is only required if the unit tests are to be build.
      git clone https://github.com/Goddard-Fortran-Ecosystem/pFUnit.git
      cd pFUnit
 
-#. Create a build folder and compile the code::
+#. Create a build folder:: 
 
      mkdir build
      cd build
-     cmake ./..
+
+#. Compile the code. Note that pFUnit 4 compiles with MPI support on by default, 
+so MPI needs to be available. More specifically, for compatibility with SCONE, mpi_f08 
+needs to be available::
+
+     cmake -DENABLE_MPI_F08=YES ./..
+
+Alternatively, if one does't have access to mpi_f08 and wants to compile SCONE without MPI support::
+
+     cmake -DSKPI_MPI=YES ./..
+
+#. Run make and install application
+
      make tests
      make install
 
@@ -177,26 +191,27 @@ Compiling SCONE
 
      git clone https://github.com/CambridgeNuclear/SCONE
 
-#. Create build folder in the project directory (e.g. Build)::
+#. Create build folder in the project directory (e.g. build)::
 
      cd ./scone
-     mkdir Build
+     mkdir build
 
 #. Generate makefile with CMake and compile::
 
-     cmake -E chdir ./Build cmake ./..
-     make -C Build
+     cmake -E chdir ./build cmake ./..
+     make -C build
 
 #. To switch off compilation of tests use the following commands::
 
-     cmake -E chdir ./Build cmake ./.. -DBUILD_TESTS=OFF
-     make -C Build
+     cmake -E chdir ./build cmake ./.. -DBUILD_TESTS=OFF
+     make -C build
 
 #. Note that you can use ccmake utility to modify avalible options and
    regenerate your make file just type the following into your terminal and
    follow the instructions::
 
-     ccmake ./Build
+     ccmake ./build
+
 
 .. admonition:: CMake options
 
@@ -224,21 +239,43 @@ Compiling SCONE
 
        cmake -DDEBUG=ON
 
+   OPENMP
+     Sets up OpenMP to allow for shared memory parallelism. It is `ON` by default. To disable::
+
+       cmake -DOPENMP=OFF
+
+   MPI
+     Links SCONE with MPI to allow for multi-process parallelism. It is `ON` by default. To disable::
+
+       cmake -DMPI=OFF
+
+.. note:: Suppress message during MPI runs
+
+     When running SCONE with MPI the message 'No protocol specified' might appear. This was already
+     documented and reported in::
+
+       https://github.com/open-mpi/ompi/issues/7701
+
+     Since there doesn't seem to be an established solution to this, one can suppress the message for 
+     convenience by adding to the ``.bashrc`` file the line::
+
+       export HWLOC_COMPONENTS=-gl
+
 
 Run Tests to Verify
 '''''''''''''''''''
 
 If you compiled SCONE with tests enabled (you should by the way) you can now
 verify that it works correctly by running the automated test suites. You
-**must** execute the following commands from ``scone`` directory. Some
+**must** execute the following commands from the ``scone`` directory. Some
 integration tests use files in ``IntegrationTestFiles`` and have hard-coded
 relative paths. **Integration tests may fail if they are run from other
 directory**. Run::
 
-    ./Build/unitTests
-    ./Build/integrationTests
+    ./build/unitTests
+    ./build/integrationTests
 
-This assume that ``Build`` is the build directory. If the tests were successful
+This assume that ``build`` is the build directory. If the tests were successful
 that is great. If some of them failed it is troubling. Please open an Issue in
 the online repository so we can try to resolve what is the problem. Provide at
 least the following information:
diff --git a/docs/Running.rst b/docs/Running.rst
new file mode 100644
index 000000000..8913299dd
--- /dev/null
+++ b/docs/Running.rst
@@ -0,0 +1,37 @@
+.. _running:
+
+Running
+=======
+
+Note that instructions on how to write an input file can be found
+in the :ref:`Input Manual<input-manual>`.
+
+Running SCONE
+'''''''''''''
+
+After installation of all the dependencies and the compilation of SCONE, 
+one can finally run the code. The executable ``scone.out`` is in the ``build``
+folder; one can run it from any folder by specifying the path to the executables
+and to the input file::
+
+   <path_to_build_folder>/scone.out <input_file>
+
+.. admonition:: Options
+
+   OpenMPI
+     Specifies the number ``<n>`` of OpenMP threads to be used::
+
+	--omp <n>
+
+   Geometry plotting
+     Allows plotting the geometry without running the actual calculations::
+
+	--plot
+
+   MPI
+     To run with multiple processes, one needs to run using ``mpirun`` and 
+     specify the number of processes <n>::
+
+	<path_to_mpirun>/mpirun -np <n> <path_to_build_folder>/scone.out <input_file>
+
+
diff --git a/docs/Tutorials/Tutorial_1.rst b/docs/Tutorials/Tutorial_1.rst
index 66c9646e4..bd6ab4812 100644
--- a/docs/Tutorials/Tutorial_1.rst
+++ b/docs/Tutorials/Tutorial_1.rst
@@ -12,7 +12,7 @@ In this tutorial you will learn:
 This tutorial assumes that you use a UNIX-like environment.
 Some knowledge of basic UNIX command line commands is essential.
 
-Creating new Git Branch
+Creating a new Git Branch
 -----------------------
 #. We assume that you have already cloned SCONE on to your machine. Information
    on how to do that is available in (:ref:`installation`)
@@ -52,7 +52,7 @@ Creating new Git Branch
      missingENDFLaws
      newNuclearData
 
-Creating new Fortran module
+Creating a new Fortran module
 ---------------------------
 
 #. Now that we have created space where we can easily separate our modifications

From 6a1eb86d2c91d320a0d12b92489a6e690a3de6d8 Mon Sep 17 00:00:00 2001
From: "V. Raffuzzi" <vr339@eng.cam.ac.uk>
Date: Fri, 15 Nov 2024 18:38:39 +0000
Subject: [PATCH 26/27] Making tallies reproducible with mpi synchronisation

---
 ParticleObjects/particleDungeon_class.f90     |   3 +-
 PhysicsPackages/eigenPhysicsPackage_class.f90 |   1 +
 .../fixedSourcePhysicsPackage_class.f90       |   1 +
 .../Tests/keffAnalogClerk_test.f90            |   8 +-
 .../Tests/shannonEntropyClerk_test.f90        |   4 +-
 .../TallyClerks/centreOfMassClerk_class.f90   |  50 ++++++--
 .../collisionProbabilityClerk_class.f90       |   1 +
 .../TallyClerks/shannonEntropyClerk_class.f90 | 114 ++++++++----------
 Tallies/TallyClerks/simpleFMClerk_class.f90   |  12 +-
 Tallies/scoreMemory_class.f90                 |  24 ++++
 Tallies/tallyAdmin_class.f90                  |   2 +-
 11 files changed, 125 insertions(+), 95 deletions(-)

diff --git a/ParticleObjects/particleDungeon_class.f90 b/ParticleObjects/particleDungeon_class.f90
index e14c0babf..364ba1acd 100644
--- a/ParticleObjects/particleDungeon_class.f90
+++ b/ParticleObjects/particleDungeon_class.f90
@@ -21,7 +21,7 @@ module particleDungeon_class
   !!
   !! particleDungeon stores particle phase-space
   !! Used in eigenvalue calculation to store fission sites generated in a cycle
-  !! Similar structures are refered to as:
+  !! Similar structures are referred to as:
   !! Store: MONK and Serpent(?)
   !! Fission Bank: OpenMC and MCNP(?)
   !!
@@ -899,7 +899,6 @@ subroutine sortByBroodID(self, k)
 
   end subroutine sortByBroodID
 
-
   !!
   !! Kill or particles in the dungeon
   !!
diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index 20ce94ce7..aef203e8a 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -225,6 +225,7 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
 
           ! Save state
           call neutron % savePreHistory()
+          call neutron % savePreCollision()
 
           ! Transport particle until its death
           history: do
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index d7966d3a2..b3905d6d3 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -208,6 +208,7 @@ subroutine cycles(self, tally, N_cycles)
 
           ! Save state
           call p % savePreHistory()
+          call p % savePreCollision()
 
           ! Transport particle until its death
           history: do
diff --git a/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90 b/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
index 37dd25722..3ab365fdd 100644
--- a/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/keffAnalogClerk_test.f90
@@ -186,8 +186,8 @@ subroutine test2CycleBatch(this)
     call pit % detain(p)
     pit % k_eff = 1.2_defReal
 
-    call this % clerk % reportCycleEnd(pit,mem)
-    call this % clerk % closeCycle(pit,mem)
+    call this % clerk % reportCycleEnd(pit, mem)
+    call this % clerk % closeCycle(pit, mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Start cycle 4
@@ -202,9 +202,9 @@ subroutine test2CycleBatch(this)
     call pit % detain(p)
     pit % k_eff = 1.2_defReal
 
-    call this % clerk % reportCycleEnd(pit,mem)
+    call this % clerk % reportCycleEnd(pit, mem)
     call mem % reduceBins()
-    call this % clerk % closeCycle(pit,mem)
+    call this % clerk % closeCycle(pit, mem)
     call mem % closeCycle(0.8_defReal)
 
     ! Validate results
diff --git a/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90 b/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
index da64d9889..6159312a0 100644
--- a/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
+++ b/Tallies/TallyClerks/Tests/shannonEntropyClerk_test.f90
@@ -103,7 +103,7 @@ subroutine testSimpleUseCase(this)
     call mem % closeCycle(ONE)
 
     ! Verify results for uniform distribution
-    idx = this % clerk % getMemAddress() + 2
+    idx = this % clerk % getMemAddress() + 3
     call mem % getResult(val, idx, samples = 1)
     @assertEqual(ONE, val, TOL)
 
@@ -121,7 +121,7 @@ subroutine testSimpleUseCase(this)
     call mem % closeCycle(ONE)
 
     ! Verify results for all particles in one bin
-    idx = this % clerk % getMemAddress() + 3
+    idx = this % clerk % getMemAddress() + 4
     call mem % getResult(val, idx, samples = 1)
     @assertEqual(ZERO, val, TOL)
 
diff --git a/Tallies/TallyClerks/centreOfMassClerk_class.f90 b/Tallies/TallyClerks/centreOfMassClerk_class.f90
index e246c8dbf..483cc23e4 100644
--- a/Tallies/TallyClerks/centreOfMassClerk_class.f90
+++ b/Tallies/TallyClerks/centreOfMassClerk_class.f90
@@ -2,11 +2,12 @@ module centreOfMassClerk_class
 
   use numPrecision
   use tallyCodes
-  use genericProcedures,          only : fatalError
+  use genericProcedures,          only : fatalError, numToChar
   use dictionary_class,           only : dictionary
   use particle_class,             only : particle, particleState
   use particleDungeon_class,      only : particleDungeon
   use outputFile_class,           only : outputFile
+  use mpi_func,                   only : getMPIWorldSize
 
   ! Basic tally modules
   use scoreMemory_class,          only : scoreMemory
@@ -64,6 +65,7 @@ subroutine init(self, dict, name)
     class(centreOfMassClerk), intent(inout)   :: self
     class(dictionary), intent(in)             :: dict
     character(nameLen), intent(in)            :: name
+    character(100), parameter :: Here = 'init (centreOfMassClerk_class.f90)'
 
     ! Assign name
     call self % setName(name)
@@ -71,6 +73,11 @@ subroutine init(self, dict, name)
     ! Read number of cycles for which to track COM
     call dict % get(self % maxCycles, 'cycles')
 
+    ! Check on cycle number
+    if (self % maxCycles <= 0) then
+      call fatalError(Here, 'Number of cycles shuold be positive. It is: '//trim(numToChar(self % maxCycles)))
+    end if
+
   end subroutine init
 
   !!
@@ -91,7 +98,7 @@ elemental function getSize(self) result(S)
     class(centreOfMassClerk), intent(in)   :: self
     integer(shortInt)                      :: S
 
-    S = 3 * self % maxCycles
+    S = 3 * self % maxCycles + 1
 
   end function getSize
 
@@ -106,16 +113,18 @@ subroutine reportCycleEnd(self, end, mem)
     integer(longInt)                          :: cc
     real(defReal), dimension(3)               :: val
 
-    if ((self % currentCycle) < (self % maxCycles)) then
+    ! Increment cycle number
+    self % currentCycle = self % currentCycle + 1
+
+    if ((self % currentCycle) <= (self % maxCycles)) then
 
-      self % currentCycle = self % currentCycle + 1
       cc = self % currentCycle
 
       ! Loop through population, scoring probabilities
       do i = 1, end % popSize()
 
         associate(state => end % get(i))
-          val = state % wgt * state % r / end % popWeight()
+          val = state % wgt * state % r
           call mem % score(val(1), self % getMemAddress() + 3*(cc - 1))
           call mem % score(val(2), self % getMemAddress() + 3*(cc - 1) + 1)
           call mem % score(val(3), self % getMemAddress() + 3*(cc - 1) + 2)
@@ -123,6 +132,9 @@ subroutine reportCycleEnd(self, end, mem)
 
       end do
 
+      ! Sample population weight for MPI
+      call mem % score(end % popWeight(), self % getMemAddress() + 3*self % maxCycles)
+
     end if
 
   end subroutine reportCycleEnd
@@ -134,16 +146,21 @@ subroutine closeCycle(self, end, mem)
     class(centreOfMassClerk), intent(inout)   :: self
     class(particleDungeon), intent(in)        :: end
     type(scoreMemory), intent(inout)          :: mem
+    real(defReal)                             :: norm
     integer(longInt)                          :: cc
 
-    if ((self % currentCycle) < (self % maxCycles)) then
+    if ((self % currentCycle) <= (self % maxCycles)) then
+
+      ! Retrieve population weight and use it to normalise scores
+      norm = mem % getScore(self % getMemAddress() + 3*self % maxCycles)
+      norm = ONE / norm
 
       cc = self % currentCycle
 
       ! Make sure results don't get normalised arbitrarily
-      call mem % closeBin(ONE, self % getMemAddress() + 3*(cc - 1))
-      call mem % closeBin(ONE, self % getMemAddress() + 3*(cc - 1) + 1)
-      call mem % closeBin(ONE, self % getMemAddress() + 3*(cc - 1) + 2)
+      call mem % closeBin(norm, self % getMemAddress() + 3*(cc - 1))
+      call mem % closeBin(norm, self % getMemAddress() + 3*(cc - 1) + 1)
+      call mem % closeBin(norm, self % getMemAddress() + 3*(cc - 1) + 2)
 
     end if
 
@@ -167,11 +184,18 @@ subroutine print(self, outFile, mem)
     class(centreOfMassClerk), intent(in) :: self
     class(outputFile), intent(inout)     :: outFile
     type(scoreMemory), intent(in)        :: mem
-    integer(shortInt)                    :: i
+    integer(shortInt)                    :: i, batches
     character(nameLen)                   :: name
     integer(longInt)                     :: ccIdx
     real(defReal)                        :: val
 
+    ! Determine number of batches for normalisation with MPI
+    if (mem % reduced) then
+      batches = 1
+    else
+      batches = getMPIWorldSize()
+    end if
+
     ! Begin block
     call outFile % startBlock(self % getName())
 
@@ -180,7 +204,7 @@ subroutine print(self, outFile, mem)
     call outFile % startArray(name, [self % maxCycles])
     do i = 1, self % maxCycles
       ccIdx = self % getMemAddress() + 3*(i - 1)
-      call mem % getResult(val, ccIdx, samples = 1)
+      call mem % getResult(val, ccIdx, samples = batches)
       call outFile % addValue(val)
     end do
     call outFile % endArray()
@@ -189,7 +213,7 @@ subroutine print(self, outFile, mem)
     call outFile % startArray(name, [self % maxCycles])
     do i = 1, self % maxCycles
       ccIdx = self % getMemAddress() + 3*(i - 1) + 1
-      call mem % getResult(val, ccIdx, samples = 1)
+      call mem % getResult(val, ccIdx, samples = batches)
       call outFile % addValue(val)
     end do
     call outFile % endArray()
@@ -198,7 +222,7 @@ subroutine print(self, outFile, mem)
     call outFile % startArray(name, [self % maxCycles])
     do i = 1, self % maxCycles
       ccIdx = self % getMemAddress() + 3*(i - 1) + 2
-      call mem % getResult(val, ccIdx, samples = 1)
+      call mem % getResult(val, ccIdx, samples = batches)
       call outFile % addValue(val)
     end do
     call outFile % endArray()
diff --git a/Tallies/TallyClerks/collisionProbabilityClerk_class.f90 b/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
index c9debd62a..d5fbc2494 100644
--- a/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
+++ b/Tallies/TallyClerks/collisionProbabilityClerk_class.f90
@@ -196,6 +196,7 @@ subroutine reportInColl(self, p, xsData, mem, virtual)
     state = p
     cIdx = self % map % map(state)
 
+
     ! Invalid indices are allowed given that CPs must sum to one - this will include
     ! neutrons which collide outside the mapped region of phase space
     ! These correspond to index = 0
diff --git a/Tallies/TallyClerks/shannonEntropyClerk_class.f90 b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
index 1315d938e..e0f8ff07a 100644
--- a/Tallies/TallyClerks/shannonEntropyClerk_class.f90
+++ b/Tallies/TallyClerks/shannonEntropyClerk_class.f90
@@ -8,6 +8,7 @@ module shannonEntropyClerk_class
   use particle_class,             only : particle, particleState
   use particleDungeon_class,      only : particleDungeon
   use outputFile_class,           only : outputFile
+  use mpi_func,                   only : getMPIWorldSize
 
   ! Basic tally modules
   use scoreMemory_class,          only : scoreMemory
@@ -17,11 +18,6 @@ module shannonEntropyClerk_class
   use tallyMap_inter,             only : tallyMap
   use tallyMapFactory_func,       only : new_tallyMap
 
-  use mpi_func,                   only : isMPIMaster
-#ifdef MPI
-  use mpi_func,                   only : mpi_reduce, MPI_SUM, MPI_DOUBLE, MPI_COMM_WORLD, MASTER_RANK
-#endif
-
   implicit none
   private
 
@@ -34,10 +30,6 @@ module shannonEntropyClerk_class
   !!
   !! Scores Shannon entropy for a user-specified number of cycles
   !!
-  !! NOTE: when using MPI with multiple processes, the scores from all processes are
-  !!       collected (brute-force) in the master process. Only the master process
-  !!       results are correct and accessible
-  !!
   !! Sample dictionary input:
   !!
   !!  clerkName {
@@ -115,7 +107,7 @@ elemental function getSize(self) result(S)
     class(shannonEntropyClerk), intent(in) :: self
     integer(shortInt)                      :: S
 
-    S = self % N + self % maxCycles
+    S = self % N + 1 + self % maxCycles
 
   end function getSize
 
@@ -127,47 +119,26 @@ subroutine reportCycleEnd(self, end, mem)
     class(particleDungeon), intent(in)        :: end
     type(scoreMemory), intent(inout)          :: mem
     integer(shortInt)                         :: i, idx
-    real(defReal), dimension(self % N)        :: prob, bufferArray
-    real(defReal)                             :: totWgt, buffer
-#ifdef MPI
-    integer(shortInt)                         :: error
-#endif
 
+    ! Increment cycle number
     self % currentCycle = self % currentCycle + 1
 
     if (self % currentCycle <= self % maxCycles) then
 
-      prob = ZERO
+      ! Sample population weight for MPI
+      call mem % score(end % popWeight(), self % getMemAddress())
 
       ! Loop through population, scoring probabilities
       do i = 1, end % popSize()
 
         associate(state => end % get(i))
           idx = self % map % map(state)
-          if (idx > 0) prob(idx) = prob(idx) + state % wgt
+          if (idx == 0) cycle
+          call mem % score(state % wgt, self % getMemAddress() + idx)
         end associate
 
       end do
 
-      totWgt = end % popWeight()
-
-      buffer = totWgt
-      bufferArray = prob
-
-#ifdef MPI
-      call mpi_reduce(totWgt, buffer, 1, MPI_DOUBLE, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
-      call mpi_reduce(prob, bufferArray, self % N, MPI_DOUBLE, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
-#endif
-
-      if (isMPIMaster()) then
-
-        prob = bufferArray / buffer
-        do i = 1, self % N
-          call mem % score(prob(i), self % getMemAddress() - 1 + i)
-        end do
-
-      end if
-
     end if
 
   end subroutine reportCycleEnd
@@ -181,30 +152,36 @@ subroutine closeCycle(self, end, mem)
     type(scoreMemory), intent(inout)          :: mem
     integer(shortInt)                         :: i
     integer(longInt)                          :: ccIdx
-    real(defReal)                             :: one_log2, prob, val
+    real(defReal)                             :: totWgt, one_log2, prob, val
 
-    if (isMPIMaster()) then
+    if (self % currentCycle <= self % maxCycles) then
 
-      if (self % currentCycle <= self % maxCycles) then
+      ! Get total population weight for this cycle
+      totWgt = mem % getScore(self % getMemAddress())
 
-        ccIdx = self % getMemAddress() + self % N - 1 + self % currentCycle
+      ! Initialise contants and counters
+      val = ZERO
+      one_log2 = ONE/log(TWO)
 
-        val = ZERO
-        one_log2 = ONE/log(TWO)
+      ! Loop through bins, summing entropy
+      do i = 1, self % N
 
-        ! Loop through bins, summing entropy
-        do i = 1, self % N
-          prob = mem % getScore(self % getMemAddress() - 1 + i)
+        prob = mem % getScore(self % getMemAddress() + i)
+        prob = prob / totWgt
 
-          if ((prob > ZERO) .and. (prob < ONE)) then
-            val = val - prob * log(prob) * one_log2
-          end if
+        if ((prob > ZERO) .and. (prob < ONE)) then
+          val = val - prob * log(prob) * one_log2
+        end if
 
-        end do
+      end do
 
-        call mem % accumulate(val, ccIdx)
+      ccIdx = self % getMemAddress() + self % N + self % currentCycle
+      call mem % accumulate(val, ccIdx)
 
-      end if
+      ! Reset memory bins in preparation for next cycle
+      do i = 0, self % N
+        call mem % resetBin(self % getMemAddress() + i)
+      end do
 
     end if
 
@@ -228,32 +205,35 @@ subroutine print(self, outFile, mem)
     class(shannonEntropyClerk), intent(in) :: self
     class(outputFile), intent(inout)       :: outFile
     type(scoreMemory), intent(in)          :: mem
-    integer(shortInt)                      :: i
+    integer(shortInt)                      :: i, batches
     character(nameLen)                     :: name
     integer(longInt)                       :: ccIdx
     real(defReal)                          :: val
 
-    if (isMPIMaster()) then
-
-      ! Begin block
-      call outFile % startBlock(self % getName())
+    ! Determine number of batches for normalisation with MPI
+    if (mem % reduced) then
+      batches = 1
+    else
+      batches = getMPIWorldSize()
+    end if
 
-      ! Print entropy
-      name = 'shannonEntropy'
+    ! Begin block
+    call outFile % startBlock(self % getName())
 
-      call outFile % startArray(name, [self % maxCycles])
+    ! Print entropy
+    name = 'shannonEntropy'
 
-      do i = 1, self % maxCycles
-        ccIdx = self % getMemAddress() + self % N - 1 + i
-        call mem % getResult(val, ccIdx, samples = 1)
-        call outFile % addValue(val)
-      end do
+    call outFile % startArray(name, [self % maxCycles])
 
-      call outFile % endArray()
+    do i = 1, self % maxCycles
+      ccIdx = self % getMemAddress() + self % N + i
+      call mem % getResult(val, ccIdx, samples = batches)
+      call outFile % addValue(val)
+    end do
 
-      call outFile % endBlock()
+    call outFile % endArray()
 
-    end if
+    call outFile % endBlock()
 
   end subroutine print
 
diff --git a/Tallies/TallyClerks/simpleFMClerk_class.f90 b/Tallies/TallyClerks/simpleFMClerk_class.f90
index 4f9852def..be8554794 100644
--- a/Tallies/TallyClerks/simpleFMClerk_class.f90
+++ b/Tallies/TallyClerks/simpleFMClerk_class.f90
@@ -177,8 +177,8 @@ subroutine reportCycleStart(self, start, mem)
       associate (state => start % get(i))
 
         idx = self % map % map(state)
-        if (idx == 0) return
-        call mem % score(state % wgt, self % getMemAddress() - 1 + idx)
+        if (idx == 0) cycle
+        call mem % score(state % wgt, self % getMemAddress() + idx - 1)
 
       end associate
 
@@ -243,7 +243,7 @@ subroutine reportInColl(self, p, xsData, mem, virtual)
     score = self % resp % get(p, xsData) * flux
 
     ! Score element of the matrix
-    ! Note that matrix memory location starts from memAddress + N
+    ! Note that the matrix memory location starts from memAddress + N
     addr = self % getMemAddress() + sIdx * self % N + cIdx - 1
     call mem % score(score, addr)
 
@@ -271,7 +271,7 @@ subroutine closeCycle(self, end, mem)
       do i = 1, self % N
 
         ! Calculate normalisation factor
-        normFactor = mem % getScore(self % getMemAddress() - 1 + i)
+        normFactor = mem % getScore(self % getMemAddress() + i - 1)
         if (normFactor /= ZERO) normFactor = ONE / normFactor
 
         do j = 1, self % N
@@ -342,7 +342,7 @@ pure subroutine getResult(self, res, mem)
 
         ! Load entries
         addr = self % getMemAddress() + self % N - 1
-        do i = 1,self % N
+        do i = 1, self % N
           do j = 1, self % N
             addr = addr + 1
             call mem % getResult(val, STD, addr)
@@ -394,7 +394,7 @@ subroutine print(self, outFile, mem)
 
     call outFile % startArray(name, [self % N, self % N])
 
-    do i = 1,self % N * self % N
+    do i = 1, self % N * self % N
       addr = addr + 1
       call mem % getResult(val, std, addr)
       call outFile % addResult(val, std)
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index 2d0a6fabb..bbe88a453 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -95,10 +95,13 @@ module scoreMemory_class
       integer(shortInt)                        :: cycles = 0        !! Cycles counter
       integer(shortInt)                        :: batchSize = 1     !! Batch interval size (in cycles)
       logical(defBool)                         :: reduced = .false. !! True if bins have been reduced
+
   contains
+
     ! Interface procedures
     procedure :: init
     procedure :: kill
+    procedure :: resetBin
     generic   :: score      => score_defReal, score_shortInt, score_longInt
     generic   :: accumulate => accumulate_defReal, accumulate_shortInt, accumulate_longInt
     generic   :: getResult  => getResult_withSTD, getResult_withoutSTD
@@ -185,6 +188,27 @@ subroutine kill(self)
 
   end subroutine kill
 
+  !!
+  !! Reset the data of a given memory slot
+  !!
+  subroutine resetBin(self, idx)
+    class(scoreMemory), intent(inout) :: self
+    integer(longInt), intent(in)      :: idx
+    character(100),parameter :: Here = 'resetBin (scoreMemory_class.f90)'
+
+    ! Verify bounds for the index
+    if( idx < 0_longInt .or. idx > self % N) then
+      call fatalError(Here,'Index '//numToChar(idx)//' is outside bounds of &
+                            & memory with size '//numToChar(self % N))
+    end if
+
+    ! Reset scores
+    self % bins(idx, BIN)   = ZERO
+    self % bins(idx, CSUM)  = ZERO
+    self % bins(idx, CSUM2) = ZERO
+
+  end subroutine resetBin
+
   !!
   !! Score a result on a given single bin under idx
   !!
diff --git a/Tallies/tallyAdmin_class.f90 b/Tallies/tallyAdmin_class.f90
index c244b228a..68b11e530 100644
--- a/Tallies/tallyAdmin_class.f90
+++ b/Tallies/tallyAdmin_class.f90
@@ -779,8 +779,8 @@ recursive subroutine reportCycleEnd(self, end)
         normScore  = self % mem % getScore(self % normBinAddr)
         if (normScore == ZERO) then
           call fatalError(Here, 'Normalisation score from clerk:' // self % normClerkName // 'is 0')
-
         end if
+
         normFactor = self % normValue / normScore
 
       else

From 7b36bcc0d7a5e9f7f928508b177e3e331fcb9d0f Mon Sep 17 00:00:00 2001
From: "V. Raffuzzi" <vr339@eng.cam.ac.uk>
Date: Fri, 13 Dec 2024 16:49:43 +0000
Subject: [PATCH 27/27] Addressing reviews and fixing github tests

---
 .github/workflows/build-and-test.yml          |  29 ++-
 .gitignore                                    |   3 +-
 CMakeLists.txt                                |  23 +-
 DataStructures/heapQueue_class.f90            |   9 +-
 .../Tests/baseMgNeutronDatabase_iTest.f90     |   9 +-
 .../mgNeutronData/mgNeutronMaterial_inter.f90 |   6 +-
 .../Tests/particleDungeon_test.f90            |   9 +
 ParticleObjects/particleDungeon_class.f90     | 211 +++---------------
 ParticleObjects/particle_class.f90            |  52 ++++-
 PhysicsPackages/eigenPhysicsPackage_class.f90 |  12 +-
 .../fixedSourcePhysicsPackage_class.f90       |   4 +-
 SharedModules/errors_mod.f90                  |   4 -
 SharedModules/mpi_func.f90                    |  58 ++---
 Tallies/scoreMemory_class.f90                 |   8 +-
 docs/Running.rst                              |   4 +-
 15 files changed, 189 insertions(+), 252 deletions(-)

diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index 7bb7a6a9e..124f174ef 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -10,10 +10,11 @@ jobs:
   build-and-test:
     strategy:
       matrix:
-        compiler: [gfortran8, gfortran9, gfortran10]
+        compiler: [gfortran10, gfortran11, gfortran12]
     runs-on: ubuntu-20.04
     container:
       image: mikolajkowalski/scone-test:${{matrix.compiler}}_pfu4
+      options: --env OMPI_ALLOW_RUN_AS_ROOT=1 --env OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
     steps:
       - uses: actions/checkout@v3
       - name: CompileAndTest
@@ -22,12 +23,13 @@ jobs:
           cd build
           cmake ..
           make -j
-          make test
+          ctest --output-on-faliure
           cd -
   build-and-test-debug:
     runs-on: ubuntu-20.04
     container:
-      image: mikolajkowalski/scone-test:gfortran10_pfu4
+      image: mikolajkowalski/scone-test:gfortran12_pfu4
+      options: --env OMPI_ALLOW_RUN_AS_ROOT=1 --env OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
     steps:
       - uses: actions/checkout@v3
       - name: CompileAndTest
@@ -36,12 +38,13 @@ jobs:
           cd build
           cmake -DDEBUG=ON ..
           make -j
-          make test
+          ctest --output-on-faliure
           cd -
   build-and-test-no-openmp:
     runs-on: ubuntu-20.04
     container:
-      image: mikolajkowalski/scone-test:gfortran10_pfu4
+      image: mikolajkowalski/scone-test:gfortran12_pfu4
+      options: --env OMPI_ALLOW_RUN_AS_ROOT=1 --env OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
     steps:
       - uses: actions/checkout@v3
       - name: CompileAndTest
@@ -50,5 +53,19 @@ jobs:
           cd build
           cmake -DOPENMP=OFF ..
           make -j
-          make test
+          ctest --output-on-faliure
+          cd -
+  build-and-test-no-mpi:
+    runs-on: ubuntu-20.04
+    container:
+      image: mikolajkowalski/scone-test:gfortran12_pfu4
+    steps:
+      - uses: actions/checkout@v3
+      - name: CompileAndTest
+        run : |
+          mkdir build
+          cd build
+          cmake -DMPI=OFF ..
+          make -j
+          ctest --output-on-faliure
           cd -
diff --git a/.gitignore b/.gitignore
index d51b089f5..32fd9eea9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ cream.egg-info/
 Build
 build
 
-# Ignore all hidden files (except gitignore)
+# Ignore all hidden files (except gitignore and the github folder)
 .*
 !/.gitignore
+!/.github/*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13c1f3d28..7cc3fdabd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -92,7 +92,7 @@ endif()
 # Add environmental variable to default search directories
 list(APPEND CMAKE_PREFIX_PATH $ENV{LAPACK_INSTALL})
 
-find_package(LAPACK REQUIRED )
+find_package(LAPACK REQUIRED)
 message(STATUS ${LAPACK_LIBRARIES})
 
 # Dependencies for BUILD_TESTS
@@ -144,9 +144,10 @@ get_property(SRCS GLOBAL PROPERTY SRCS_LIST)
 
 # Compile library
 add_library(scone STATIC ${SRCS})
-target_compile_options(scone PRIVATE ${scone_extra_flags} )
-target_link_libraries(scone PUBLIC ${LAPACK_LIBRARIES} )
+target_compile_options(scone PRIVATE ${scone_extra_flags})
+target_link_libraries(scone PUBLIC ${LAPACK_LIBRARIES})
 if(MPI)
+  add_compile_definitions(MPI)
   target_link_libraries(scone PUBLIC MPI::MPI_Fortran)
 endif()
 
@@ -156,8 +157,8 @@ endif()
 
 ####################################################################################################
 # COMPILE SOLVERS
-add_executable(scone.out ./Apps/scone.f90 )
-target_link_libraries(scone.out scone )
+add_executable(scone.out ./Apps/scone.f90)
+target_link_libraries(scone.out scone)
 
 ####################################################################################################
 # COMPILE UNIT TESTS
@@ -172,16 +173,22 @@ if(BUILD_TESTS)
     list(APPEND UNIT_TESTS_RELATIVE ${_testPath})
   endforeach()
 
+  if(MPI)
+    set(MAX_PES_OPTION MAX_PES 1)
+  else()
+    set(MAX_PES_OPTION "")
+  endif()
+
   add_pfunit_ctest(unitTests
     TEST_SOURCES ${UNIT_TESTS_RELATIVE}
     LINK_LIBRARIES scone
-    MAX_PES 1
+    ${MAX_PES_OPTION}
     )
 
   # pFUnit may have a bug which causes a unused variable `class(Test), allocatable :: t` be
   # present if the suite contains only a TestCase and its methods
   # We need to suppress this warning for clarity
-  target_compile_options(unitTests PRIVATE "-Wno-unused-variable" )
+  target_compile_options(unitTests PRIVATE "-Wno-unused-variable")
 
 ####################################################################################################
 # COMPILE INTEGRATION TESTS
@@ -203,7 +210,7 @@ if(BUILD_TESTS)
   # pFUnit may have a bug which causes a unused variable `class(Test), allocatable :: t` be
   # present if the suite contains only a TestCase and its methods
   # We need to suppress this warning for clarity
-  target_compile_options(integrationTests PRIVATE "-Wno-unused-variable" )
+  target_compile_options(integrationTests PRIVATE "-Wno-unused-variable")
 
 endif()
 
diff --git a/DataStructures/heapQueue_class.f90 b/DataStructures/heapQueue_class.f90
index 0a101cf93..6daf01854 100644
--- a/DataStructures/heapQueue_class.f90
+++ b/DataStructures/heapQueue_class.f90
@@ -96,14 +96,21 @@ subroutine push(self, val)
     self % size = self % size + 1
     self % heap(self % size) = val
 
+    ! If the heap is of size 1 there is no need to order it
+    ! Also, avoid test fail in debug mode, since parent would be 0 and the
+    ! code is looking for self % heap(parent) inside the while condition
+    if (self % size == 1) return
+
     ! Shift the new value up the heap to restore the heap property
     child = self % size
     parent = child / 2
 
-    do while (child > 1 .and. self % heap(parent) < self % heap(child))
+    do while (self % heap(parent) < self % heap(child))
       call swap(self % heap(parent), self % heap(child))
       child = parent
       parent = child / 2
+      ! As above: avoid error in debug mode, caused by trying to access self % heap(0)
+      if (parent == 0) return
     end do
 
   end subroutine push
diff --git a/NuclearData/mgNeutronData/baseMgNeutron/Tests/baseMgNeutronDatabase_iTest.f90 b/NuclearData/mgNeutronData/baseMgNeutron/Tests/baseMgNeutronDatabase_iTest.f90
index 5ce32ddca..daa6784d9 100644
--- a/NuclearData/mgNeutronData/baseMgNeutron/Tests/baseMgNeutronDatabase_iTest.f90
+++ b/NuclearData/mgNeutronData/baseMgNeutron/Tests/baseMgNeutronDatabase_iTest.f90
@@ -7,6 +7,7 @@ module baseMgNeutronDatabase_iTest
   use dictionary_class,   only : dictionary
   use dictParser_func,    only : charToDict
   use particle_class,     only : particle
+  use RNG_class,          only : RNG
 
   ! Nuclear Data Objects & Interfaces
   use baseMgNeutronDatabase_class, only : baseMgNeutronDatabase, baseMgNeutronDatabase_CptrCast, &
@@ -59,12 +60,12 @@ subroutine testBaseMgNeutronDatabaseWithP0()
     type(dictionary)                     :: matMenuDict
     type(particle)                       :: p
     type(neutronMacroXSs)                :: xss
+    type(RNG), target                    :: pRNG
     type(baseMgNeutronMaterial),pointer  :: mat
     class(baseMgNeutronMaterial),pointer :: matClass
     class(reactionHandle), pointer       :: reac
     real(defReal),parameter :: TOL = 1.0E-6_defReal
 
-
     data_ptr => database
 
     ! Load materialMenu
@@ -81,6 +82,8 @@ subroutine testBaseMgNeutronDatabaseWithP0()
     @assertEqual(4, database % nGroups())
 
     ! Test getting Transport XS
+    ! Associate pointer to pass tests in debug mode
+    p % pRNG => pRNG
     p % G = 1
     @assertEqual(2.1_defReal, database % getTrackingXS(p, 1, MATERIAL_XS), TOL)
 
@@ -185,12 +188,12 @@ subroutine testBaseMgNeutronDatabaseWithP1()
     type(dictionary)                     :: matMenuDict
     type(particle)                       :: p
     type(neutronMacroXSs)                :: xss
+    type(RNG), target                    :: pRNG
     type(baseMgNeutronMaterial),pointer  :: mat
     class(baseMgNeutronMaterial),pointer :: matClass
     class(reactionHandle), pointer       :: reac
     real(defReal),parameter :: TOL = 1.0E-6_defReal
 
-
     data_ptr => database
 
     ! Load materialMenu
@@ -207,6 +210,8 @@ subroutine testBaseMgNeutronDatabaseWithP1()
     @assertEqual(4, database % nGroups())
 
     ! Test getting Transport XS
+    ! Associate pointer to pass tests in debug mode
+    p % pRNG => pRNG
     p % G = 1
     @assertEqual(2.1_defReal, database % getTrackingXS(p, 1, MATERIAL_XS), TOL)
 
diff --git a/NuclearData/mgNeutronData/mgNeutronMaterial_inter.f90 b/NuclearData/mgNeutronData/mgNeutronMaterial_inter.f90
index 4b8aa0cfa..daa90c37f 100644
--- a/NuclearData/mgNeutronData/mgNeutronMaterial_inter.f90
+++ b/NuclearData/mgNeutronData/mgNeutronMaterial_inter.f90
@@ -111,11 +111,15 @@ subroutine getMacroXSs_byP(self, xss, p)
     class(mgNeutronMaterial), intent(in) :: self
     type(neutronMacroXSs), intent(out)   :: xss
     class(particle), intent(in)          :: p
+    integer(shortInt)                    :: matIdx
     character(100), parameter :: Here = 'getMacroXSs_byP (mgNeutronMateerial_inter.f90)'
 
     if (.not. p % isMG) call fatalError(Here, 'CE particle was given to MG data')
 
-    associate (matCache => cache_materialCache(p % matIdx()))
+    ! Store p % matIdx() in a dedicated variable to avoid compilation errors with gfortran >= 13.2
+    matIdx = p % matIdx()
+
+    associate (matCache => cache_materialCache(matIdx))
 
       if (matCache % G_tail /= p % G) then
         ! Get cross sections
diff --git a/ParticleObjects/Tests/particleDungeon_test.f90 b/ParticleObjects/Tests/particleDungeon_test.f90
index bea00ad86..7628e7746 100644
--- a/ParticleObjects/Tests/particleDungeon_test.f90
+++ b/ParticleObjects/Tests/particleDungeon_test.f90
@@ -4,7 +4,9 @@ module particleDungeon_test
   use RNG_class,             only : RNG
   use particle_class,        only : particle, particleState
   use particleDungeon_class, only : particleDungeon
+#ifdef MPI
   use mpi_func,              only : mpiInitTypes, MPI_COMM_WORLD
+#endif
   use funit
 
   implicit none
@@ -213,13 +215,18 @@ subroutine testNormPopDown()
     real(defReal), parameter :: TOL = 1.0E-9
     character(100),parameter :: Here = 'testNormPopDown (particleDungeon_test.f90)'
 
+#ifdef MPI
     call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
 
     if (worldSize > 1) &
       call fatalError(Here, 'This test cannot be run with multiple MPI processes')
 
     ! Initialise MPI types needed for this procedure
+    ! NOTE: This is necessary because the normalisation uses some mpi procedure
+    ! with data types manually defined inside mpiInitTypes. During the tests,
+    ! mpiInit and mpiInitTypes aren't called, so this is done manually here
     call mpiInitTypes()
+#endif
 
     ! Initialise
     call dungeon % init(10)
@@ -260,10 +267,12 @@ subroutine testNormPopUp()
     real(defReal), parameter :: TOL = 1.0E-9
     character(100),parameter :: Here = 'testNormPopUp (particleDungeon_test.f90)'
 
+#ifdef MPI
     call mpi_comm_size(MPI_COMM_WORLD, worldSize, ierr)
 
     if (worldSize > 1) &
       call fatalError(Here, 'This test cannot be run with multiple MPI processes')
+#endif
 
     ! Initialise
     call dungeon % init(20)
diff --git a/ParticleObjects/particleDungeon_class.f90 b/ParticleObjects/particleDungeon_class.f90
index 364ba1acd..b92d9fffe 100644
--- a/ParticleObjects/particleDungeon_class.f90
+++ b/ParticleObjects/particleDungeon_class.f90
@@ -3,16 +3,17 @@ module particleDungeon_class
   use numPrecision
   use errors_mod,            only : fatalError
   use genericProcedures,     only : numToChar, swap
-  use particle_class,        only : particle, particleState
+  use particle_class,        only : particle, particleStateData, particleState
   use RNG_class,             only : RNG
   use heapQueue_class,       only : heapQueue
 
   use mpi_func,              only : isMPIMaster, getMPIWorldSize, getMPIRank, getOffset
 #ifdef MPI
   use mpi_func,              only : mpi_gather, mpi_allgather, mpi_send, mpi_recv, &
-                                    mpi_Bcast, MPI_COMM_WORLD, MASTER_RANK, MPI_DOUBLE, &
-                                    MPI_INT, MPI_LONG_LONG, MPI_PARTICLE_STATE, &
-                                    MPI_STATUS_IGNORE, particleStateDummy
+                                    mpi_bcast, MPI_COMM_WORLD, MPI_STATUS_IGNORE,  &
+                                    MASTER_RANK, MPI_PARTICLE_STATE, MPI_DEFREAL,  &
+                                    MPI_SHORTINT, MPI_LONGINT
+
 #endif
 
   implicit none
@@ -102,7 +103,6 @@ module particleDungeon_class
     procedure  :: setSize
     procedure  :: printToFile
     procedure  :: sortByBroodID
-    procedure  :: samplingWoReplacement
 
     ! Private procedures
     procedure, private :: detain_particle
@@ -429,8 +429,8 @@ subroutine normSize(self, totPop, rand)
     real(defReal)                         :: threshold, rn
     integer(longInt)                      :: seedTemp
     integer(shortInt)                     :: maxbroodID, totSites, excess, heapSize, &
-                                             n_duplicates, i, j, n_copies, count, nRanks, &
-                                             rank
+                                             n_duplicates, n_copies, count, nRanks,  &
+                                             rank, i, j
     integer(longInt), dimension(:), allocatable  :: seeds
     integer(shortInt), dimension(:), allocatable :: keepers, popSizes
 #ifdef MPI
@@ -453,7 +453,7 @@ subroutine normSize(self, totPop, rand)
 
 #ifdef MPI
     ! Get the population sizes of all ranks into the array popSizes in master branch
-    call mpi_gather(self % pop, 1, MPI_INT, popSizes, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD, error)
+    call mpi_gather(self % pop, 1, MPI_SHORTINT, popSizes, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD, error)
 #endif
 
     ! In the master process, calculate sampling threshold for the whole population
@@ -505,9 +505,9 @@ subroutine normSize(self, totPop, rand)
 
     ! Broadcast threshold, excess and random number seeds to all processes
 #ifdef MPI
-    call MPI_Bcast(threshold, 1, MPI_DOUBLE, MASTER_RANK, MPI_COMM_WORLD)
-    call MPI_Bcast(excess, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD)
-    call MPI_Bcast(seeds, nRanks, MPI_LONG_LONG, MASTER_RANK, MPI_COMM_WORLD)
+    call mpi_bcast(threshold, 1, MPI_DEFREAL, MASTER_RANK, MPI_COMM_WORLD)
+    call mpi_bcast(excess, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD)
+    call mpi_bcast(seeds, nRanks, MPI_LONGINT, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
     ! Get local process rank and initialise local rng with the correct seed
@@ -582,7 +582,7 @@ subroutine normSize(self, totPop, rand)
 
 #ifdef MPI
     ! Get the updated population numbers from all processes
-    call mpi_allgather(self % pop, 1, MPI_INT, popSizes, 1, MPI_INT, MPI_COMM_WORLD, error)
+    call mpi_allgather(self % pop, 1, MPI_SHORTINT, popSizes, 1, MPI_SHORTINT, MPI_COMM_WORLD, error)
 #endif
 
     ! Check that normalisation worked
@@ -607,16 +607,16 @@ subroutine loadBalancing(self, totPop, nRanks, rank, popSizes)
     integer(shortInt), dimension(:), intent(in)  :: popSizes
     integer(shortInt), dimension(:), allocatable :: rankOffsets
     integer(shortInt), dimension(2)              :: offset, targetOffset
-    integer(shortInt)                            :: mpiOffset, excess, error
-    type(particleState), dimension(:), allocatable      :: stateBuffer
-    type(particleStateDummy), dimension(:), allocatable :: dummyBuffer
+    integer(shortInt)                            :: mpiOffset, excess, error, i
+    class(particleState), dimension(:), allocatable     :: stateBuffer
+    class(particleStateData), dimension(:), allocatable :: dataBuffer
 
     ! Get expected particle population in each process via the offset
     mpiOffset = getOffset(totPop)
 
     ! Communicates the offset from all processes to all processes
     allocate(rankOffsets(nRanks))
-    call mpi_allgather(mpiOffset, 1, MPI_INT, rankOffsets, 1, MPI_INT, MPI_COMM_WORLD, error)
+    call mpi_allgather(mpiOffset, 1, MPI_SHORTINT, rankOffsets, 1, MPI_SHORTINT, MPI_COMM_WORLD, error)
 
     ! Calculate actual and target cumulative number of sites in the processes before
     offset(1)       = sum(popSizes(1 : rank))
@@ -634,25 +634,26 @@ subroutine loadBalancing(self, totPop, nRanks, rank, popSizes)
     if (excess > 0) then
 
       ! Send particles from the end of the dungeon to the rank above
-      stateBuffer = self % prisoners(self % pop - excess + 1 : self % pop)
-      call initStateDummies(stateBuffer, dummyBuffer)
-      call mpi_send(dummyBuffer, excess, MPI_PARTICLE_STATE, rank + 1, rank, MPI_COMM_WORLD, error)
+      dataBuffer = self % prisoners(self % pop - excess + 1 : self % pop)
+      call mpi_send(dataBuffer, excess, MPI_PARTICLE_STATE, rank + 1, rank, MPI_COMM_WORLD, error)
       self % pop = self % pop - excess
 
     elseif (excess < 0) then
 
       ! Receive particles from the rank above and store them at the end of the dungeon
       excess = -excess
-      allocate(dummyBuffer(excess))
-      call mpi_recv(dummyBuffer, excess, MPI_PARTICLE_STATE, rank + 1, rank + 1, &
+      allocate(dataBuffer(excess), stateBuffer(excess))
+      call mpi_recv(dataBuffer, excess, MPI_PARTICLE_STATE, rank + 1, rank + 1, &
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE, error)
-      call createStatesFromDummies(dummyBuffer, stateBuffer)
+      do i = 1, abs(excess)
+        stateBuffer(i) = dataBuffer(i)
+      end do
       self % prisoners(self % pop + 1 : self % pop + excess) = stateBuffer
       self % pop = self % pop + excess
 
     end if
 
-    if (excess /= 0) deallocate(stateBuffer, dummyBuffer)
+    if (allocated(dataBuffer)) deallocate(dataBuffer)
 
     ! If needed, send/receive particle states from/to the beginning of the dungeon
     excess = offset(1) - targetOffset(1)
@@ -661,9 +662,8 @@ subroutine loadBalancing(self, totPop, nRanks, rank, popSizes)
 
       ! Send particles from the beginning of the dungeon to the rank below
       excess = -excess
-      stateBuffer = self % prisoners(1 : excess)
-      call initStateDummies(stateBuffer, dummyBuffer)
-      call mpi_send(dummyBuffer, excess, MPI_PARTICLE_STATE, rank - 1, rank, MPI_COMM_WORLD, error)
+      dataBuffer = self % prisoners(1 : excess)
+      call mpi_send(dataBuffer, excess, MPI_PARTICLE_STATE, rank - 1, rank, MPI_COMM_WORLD, error)
 
       ! Move the remaining particles to the beginning of the dungeon
       self % prisoners(1 : self % pop - excess) = self % prisoners(excess + 1 : self % pop)
@@ -672,10 +672,12 @@ subroutine loadBalancing(self, totPop, nRanks, rank, popSizes)
     elseif (excess > 0) then
 
       ! Receive particles from the rank below and store them at the beginning of the dungeon
-      allocate(dummyBuffer(excess))
-      call mpi_recv(dummyBuffer, excess, MPI_PARTICLE_STATE, rank - 1, rank - 1, &
+      allocate(dataBuffer(excess), stateBuffer(excess))
+      call mpi_recv(dataBuffer, excess, MPI_PARTICLE_STATE, rank - 1, rank - 1, &
                     MPI_COMM_WORLD, MPI_STATUS_IGNORE, error)
-      call createStatesFromDummies(dummyBuffer, stateBuffer)
+      do i = 1, abs(excess)
+        stateBuffer(i) = dataBuffer(i)
+      end do
       self % prisoners(excess + 1 : self % pop + excess) = self % prisoners(1 : self % pop)
       self % prisoners(1 : excess) = stateBuffer
       self % pop = self % pop + excess
@@ -685,157 +687,6 @@ subroutine loadBalancing(self, totPop, nRanks, rank, popSizes)
   end subroutine loadBalancing
 #endif
 
-  !!
-  !! Helper procedure for MPI loadBalancing
-  !!
-  !! Given an input array of particleState, it allocates a vector of particleStateDummy
-  !! of the same length and copies all the particleState variables. It returns
-  !! the array of particleStateDummy
-  !!
-#ifdef MPI
-  subroutine initStateDummies(states, dummies)
-    type(particleState), dimension(:), intent(in)                    :: states
-    type(particleStateDummy), dimension(:), allocatable, intent(out) :: dummies
-    integer(shortInt)                                                :: i, N
-
-    ! Allocate particleStateDummy array
-    N = size(states)
-    allocate(dummies(N))
-
-    ! Copy particleState attributes
-    do i = 1, N
-      dummies(i) % r   = states(i) % r
-      dummies(i) % dir = states(i) % dir
-    end do
-
-    dummies % wgt  = states % wgt
-    dummies % E    = states % E
-    dummies % G    = states % G
-    dummies % isMG = states % isMG
-    dummies % type = states % type
-    dummies % time = states % time
-    dummies % matIdx     = states % matIdx
-    dummies % uniqueID   = states % uniqueID
-    dummies % cellIdx    = states % cellIdx
-    dummies % collisionN = states % collisionN
-    dummies % broodID    = states % broodID
-
-  end subroutine initStateDummies
-#endif
-
-  !!
-  !! Helper procedure for MPI loadBalancing
-  !!
-  !! Given an input array of particleStateDummy, it allocates a vector of particleState
-  !! of the same length and copies all the particleStateDummy variables. It returns
-  !! the array of particleState
-  !!
-#ifdef MPI
-  subroutine createStatesFromDummies(dummies, states)
-    type(particleStateDummy), dimension(:), intent(in)          :: dummies
-    type(particleState), dimension(:), allocatable, intent(out) :: states
-    integer(shortInt)                                           :: i, N
-
-    ! Allocate particleState array
-    N = size(dummies)
-    allocate(states(N))
-
-    ! Copy particleStateDummy attributes
-    do i = 1, N
-      states(i) % r   = dummies(i) % r
-      states(i) % dir = dummies(i) % dir
-    end do
-
-    states % wgt  = dummies % wgt
-    states % E    = dummies % E
-    states % G    = dummies % G
-    states % isMG = dummies % isMG
-    states % type = dummies % type
-    states % time = dummies % time
-    states % matIdx     = dummies % matIdx
-    states % uniqueID   = dummies % uniqueID
-    states % cellIdx    = dummies % cellIdx
-    states % collisionN = dummies % collisionN
-    states % broodID    = dummies % broodID
-
-  end subroutine createStatesFromDummies
-#endif
-
-
-  !!
-  !! Normalise total number of particles in the dungeon to match the provided number.
-  !! Randomly duplicate or remove particles to match the number, performing
-  !! sampling without replacement (Knuth S algorithm).
-  !!
-  !! Does not take weight of a particle into account!
-  !!
-  !! NOTE: this procedure is not currently used
-  !!
-  subroutine samplingWoReplacement(self, N, rand)
-    class(particleDungeon), intent(inout) :: self
-    integer(shortInt), intent(in)         :: N
-    class(RNG), intent(inout)             :: rand
-    integer(shortInt)                     :: excessP, n_copies, n_duplicates
-    integer(shortInt)                     :: i, idx, maxBroodID
-    integer(shortInt), dimension(:), allocatable :: duplicates
-    character(100), parameter :: Here = 'samplingWoReplacement (particleDungeon_class.f90)'
-
-    ! Protect against invalid N
-    if (N > size(self % prisoners)) then
-      call fatalError(Here,'Requested size: '//numToChar(N) //&
-                           'is greater then max size: '//numToChar(size(self % prisoners)))
-    else if (N <= 0) then
-      call fatalError(Here,'Requested size: '//numToChar(N) //' is not +ve')
-    end if
-
-    ! Determine the maximum brood ID and sort the dungeon
-    maxBroodID = maxval(self % prisoners(1:self % pop) % broodID)
-    call self % sortByBroodID(maxbroodID)
-
-    ! Calculate excess particles to be removed
-    excessP = self % pop - N
-
-    if (excessP > 0) then ! Reduce population with reservoir sampling
-      do i = N + 1, self % pop
-        ! Select new index. Copy data if it is in the safe zone (<= N).
-        idx = int(i * rand % get()) + 1
-        if (idx <= N) then
-          self % prisoners(idx) = self % prisoners(i)
-        end if
-      end do
-      self % pop = N
-
-    else if (excessP < 0) then ! Clone randomly selected particles
-      ! For a massive undersampling duplicate (or n-plicate) particles
-      excessP = -excessP
-      n_copies = excessP / self % pop
-      n_duplicates = modulo(excessP, self % pop)
-
-      ! Copy all particles the maximum possible number of times
-      do i = 1, n_copies
-        self % prisoners(self % pop * i + 1 : self % pop * (i + 1)) = self % prisoners(1:self % pop)
-      end do
-
-      ! Choose the remainder particles to duplicate without replacement
-      duplicates = [(i, i = 1, n_duplicates)]
-      do i = n_duplicates + 1, self % pop
-        idx = int(i * rand % get()) + 1
-        if (idx <= n_duplicates) then
-          duplicates(idx) = i
-        end if
-      end do
-      self % pop = self % pop * (n_copies + 1)
-
-      ! Copy the duplicated particles at the end
-      do i = 1, n_duplicates
-        self % prisoners(self % pop + i) = self % prisoners(duplicates(i))
-      end do
-      self % pop = N
-
-    end if
-
-  end subroutine samplingWoReplacement
-
   !!
   !! Reorder the dungeon so the brood ID is in the ascending order
   !!
diff --git a/ParticleObjects/particle_class.f90 b/ParticleObjects/particle_class.f90
index f5b21c484..1338221dc 100644
--- a/ParticleObjects/particle_class.f90
+++ b/ParticleObjects/particle_class.f90
@@ -25,6 +25,9 @@ module particle_class
   !!
   !! Particle compressed for storage
   !!
+  !! particleStateData contains only the properties (useful for MPI); it is extended into
+  !! particleState which includes the procedures too and is used for tallying.
+  !!
   !! Public Members:
   !!   wgt      -> Weight of the particle
   !!   r        -> Global Position of the particle [cm]
@@ -42,12 +45,7 @@ module particle_class
   !!               in the particleDungeon so they can be sorted, which is necessary for reproducibility
   !!               with OpenMP
   !!
-  !! Interface:
-  !!   assignemnt(=)  -> Build particleState from particle
-  !!   operator(.eq.) -> Return True if particle are exactly the same
-  !!   display        -> Print debug information about the state to the console
-  !!
-  type, public :: particleState
+  type, public :: particleStateData
     real(defReal)              :: wgt  = ZERO       ! Particle weight
     real(defReal),dimension(3) :: r    = ZERO       ! Global position
     real(defReal),dimension(3) :: dir  = ZERO       ! Global direction
@@ -61,15 +59,28 @@ module particle_class
     integer(shortInt)          :: uniqueID = -1     ! Unique id at the lowest coord level
     integer(shortInt)          :: collisionN = 0    ! Number of collisions
     integer(shortInt)          :: broodID = 0       ! ID of the source particle
+  end type particleStateData
+
+  !!
+  !! Extension of particleStateData, which includes procedure
+  !!
+  !! Interface:
+  !!   assignemnt(=)  -> Build particleState from particle
+  !!   operator(.eq.) -> Return True if particle are exactly the same
+  !!   display        -> Print debug information about the state to the console
+  !!
+  type, public, extends(particleStateData) :: particleState
   contains
     generic    :: assignment(=)  => fromParticle
+    generic    :: assignment(=)  => fromParticleStateData
     generic    :: operator(.eq.) => equal_particleState
     procedure  :: display        => display_particleState
-    procedure  :: fromParticle   => particleState_fromParticle
     procedure  :: kill           => kill_particleState
+    procedure  :: fromParticle   => particleState_fromParticle
+    procedure  :: fromParticleStateData => particleState_fromParticleStateData
 
     ! Private procedures
-    procedure,private :: equal_particleState
+    procedure, private :: equal_particleState
 
   end type particleState
 
@@ -663,6 +674,31 @@ subroutine particleState_fromParticle(LHS,RHS)
 
   end subroutine particleState_fromParticle
 
+  !!
+  !! Copy particleStateData into phase coordinates
+  !!
+  subroutine particleState_fromParticleStateData(LHS,RHS)
+    class(particleState), intent(out)    :: LHS
+    class(particleStateData), intent(in) :: RHS
+
+    LHS % wgt  = RHS % wgt
+    LHS % r    = RHS % r
+    LHS % dir  = RHS % dir
+    LHS % E    = RHS % E
+    LHS % G    = RHS % G
+    LHS % isMG = RHS % isMG
+    LHS % type = RHS % type
+    LHS % time = RHS % time
+
+    ! Save all indexes
+    LHS % matIdx   = RHS % matIdx
+    LHS % uniqueID = RHS % uniqueId
+    LHS % cellIdx  = RHS % cellIdx
+    LHS % collisionN = RHS % collisionN
+    LHS % broodID    = RHS % broodID
+
+  end subroutine particleState_fromParticleStateData
+
   !!
   !! Define equal operation on phase coordinates
   !!  Phase coords are equal if all their components are the same
diff --git a/PhysicsPackages/eigenPhysicsPackage_class.f90 b/PhysicsPackages/eigenPhysicsPackage_class.f90
index aef203e8a..670e10292 100644
--- a/PhysicsPackages/eigenPhysicsPackage_class.f90
+++ b/PhysicsPackages/eigenPhysicsPackage_class.f90
@@ -8,8 +8,8 @@ module eigenPhysicsPackage_class
                                              printSectionEnd, printSeparatorLine
   use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
 #ifdef MPI
-  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INT, MPI_COMM_WORLD, &
-                                             MPI_DOUBLE, mpi_reduce, MPI_SUM
+  use mpi_func,                       only : mpi_reduce, mpi_bcast, MPI_COMM_WORLD, &
+                                             MASTER_RANK, MPI_SUM, MPI_DEFREAL, MPI_SHORTINT
 #endif
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
@@ -290,7 +290,7 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
 
 #ifdef MPI
       ! Broadcast k_eff obtained in the master to all processes
-      call MPI_Bcast(k_new, 1, MPI_DOUBLE, MASTER_RANK, MPI_COMM_WORLD)
+      call mpi_bcast(k_new, 1, MPI_DEFREAL, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
       ! Load new k-eff estimate into next cycle dungeon
@@ -310,9 +310,9 @@ subroutine cycles(self, tally, tallyAtch, N_cycles)
 
 #ifdef MPI
       ! Print the population numbers referred to all processes to screen
-      call mpi_reduce(nStart, nTemp, 1, MPI_INT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(nStart, nTemp, 1, MPI_SHORTINT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
       nStart = nTemp
-      call mpi_reduce(nEnd, nTemp, 1, MPI_INT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(nEnd, nTemp, 1, MPI_SHORTINT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
       nEnd = nTemp
 #endif
 
@@ -474,7 +474,7 @@ subroutine init(self, dict)
 
     ! Broadcast seed to all processes
 #ifdef MPI
-    call MPI_Bcast(seed_temp, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD)
+    call mpi_bcast(seed_temp, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
     seed = seed_temp
diff --git a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90 b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
index b3905d6d3..6698ba830 100644
--- a/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
+++ b/PhysicsPackages/fixedSourcePhysicsPackage_class.f90
@@ -8,7 +8,7 @@ module fixedSourcePhysicsPackage_class
                                              printSeparatorLine
   use mpi_func,                       only : isMPIMaster, getWorkshare, getOffset, getMPIRank
 #ifdef MPI
-  use mpi_func,                       only : MASTER_RANK, MPI_Bcast, MPI_INT, MPI_COMM_WORLD
+  use mpi_func,                       only : mpi_bcast, MASTER_RANK, MPI_COMM_WORLD, MPI_SHORTINT
 #endif
   use hashFunctions_func,             only : FNV_1
   use dictionary_class,               only : dictionary
@@ -383,7 +383,7 @@ subroutine init(self, dict)
 
     ! Broadcast seed to all processes
 #ifdef MPI
-    call MPI_Bcast(seed_temp, 1, MPI_INT, MASTER_RANK, MPI_COMM_WORLD)
+    call mpi_bcast(seed_temp, 1, MPI_SHORTINT, MASTER_RANK, MPI_COMM_WORLD)
 #endif
 
     seed = seed_temp
diff --git a/SharedModules/errors_mod.f90 b/SharedModules/errors_mod.f90
index f924e7ab1..09b81708b 100644
--- a/SharedModules/errors_mod.f90
+++ b/SharedModules/errors_mod.f90
@@ -9,7 +9,6 @@ module errors_mod
 
   use numPrecision
   use universalVariables, only : MAX_COL
-  !use mpi_func,           only : getMPIRank
 
   implicit none
 
@@ -37,9 +36,6 @@ subroutine fatalError(where, why)
 
     ! Upper frame
     write(error_unit, *) repeat('<>', MAX_COL / 2)
-! #ifdef MPI
-!     write(error_unit, *) 'Process rank: ', getMPIRank()
-! #endif
     write(error_unit, *) 'Fatal has occurred in:'
     write(error_unit, *) where, new_line('')
     write(error_unit, *) 'Because:'
diff --git a/SharedModules/mpi_func.f90 b/SharedModules/mpi_func.f90
index da17759c7..33ab7152c 100644
--- a/SharedModules/mpi_func.f90
+++ b/SharedModules/mpi_func.f90
@@ -4,6 +4,7 @@ module mpi_func
   use mpi_f08
 #endif
   use errors_mod,        only : fatalError
+  use particle_class,    only : particleStateData
 
   implicit none
 
@@ -11,30 +12,13 @@ module mpi_func
   integer(shortInt), private   :: rank = 0
   integer(shortInt), parameter :: MASTER_RANK = 0
 
-  !! Public type that replicates exactly particleState
-  !!
-  !! It is necessary for load balancing in the dungeon: particles have to be
-  !! transferred betwen processes, and MPI doesn't allow to transfer types with
-  !! type-bound procedures
-  type, public :: particleStateDummy
-    real(defReal)              :: wgt
-    real(defReal),dimension(3) :: r
-    real(defReal),dimension(3) :: dir
-    real(defReal)              :: E
-    integer(shortInt)          :: G
-    logical(defBool)           :: isMG
-    integer(shortInt)          :: type
-    real(defReal)              :: time
-    integer(shortInt)          :: matIdx
-    integer(shortInt)          :: cellIdx
-    integer(shortInt)          :: uniqueID
-    integer(shortInt)          :: collisionN
-    integer(shortInt)          :: broodID
-  end type particleStateDummy
-
-  !! Common MPI types
+  !! MPI types
 #ifdef MPI
-  type(MPI_Datatype)   :: MPI_PARTICLE_STATE
+  type(MPI_Datatype)    :: MPI_DEFREAL
+  type(MPI_Datatype)    :: MPI_SHORTINT
+  type(MPI_Datatype)    :: MPI_LONGINT
+  type(MPI_Datatype)    :: MPI_DEFBOOL
+  type(MPI_Datatype)    :: MPI_PARTICLE_STATE
 #endif
 
 contains
@@ -46,7 +30,7 @@ module mpi_func
   !!
   subroutine mpiInit()
 #ifdef MPI
-    integer(shortInt)        :: ierr
+    integer(shortInt) :: ierr
 
     call mpi_init(ierr)
 
@@ -79,12 +63,28 @@ end subroutine mpiFinalise
   !!
   subroutine mpiInitTypes()
 #ifdef MPI
-    integer(shortInt)        :: ierr, stateSize
-    type(particleStateDummy) :: state
+    integer(shortInt)       :: ierr, stateSize
+    type(particleStateData) :: state
     integer(kind = MPI_ADDRESS_KIND), dimension(:), allocatable :: displacements
     integer(shortInt), dimension(:), allocatable                :: blockLengths
     type(MPI_Datatype), dimension(:), allocatable               :: types
 
+    ! Define MPI type for DEFREAL
+    call mpi_type_create_f90_real(precision(1.0_defReal), range(1.0_defReal), MPI_DEFREAL, ierr)
+    call mpi_type_commit(MPI_DEFREAL, ierr)
+
+    ! Define MPI type for SHORTINT
+    call mpi_type_create_f90_integer(range(1_shortInt), MPI_SHORTINT, ierr)
+    call mpi_type_commit(MPI_SHORTINT, ierr)
+
+    ! Define MPI type for LONGINT
+    call mpi_type_create_f90_integer(range(1_longInt), MPI_LONGINT, ierr)
+    call mpi_type_commit(MPI_LONGINT, ierr)
+
+    ! Define MPI type for DEFBOOL
+    call MPI_type_contiguous(defBool, MPI_BYTE, MPI_DEFBOOL, ierr)
+    call MPI_type_commit(MPI_DEFBOOL, ierr)
+
     ! Define MPI type for particleState
     ! Note that particleState has stateSize = 13 attributes; if an attribute is
     ! added to particleState, it had to be added here too
@@ -93,8 +93,9 @@ subroutine mpiInitTypes()
 
     ! Create arrays with dimension and type of each property of particleStateDummy
     blockLengths = (/1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1/)
-    types = (/MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_LOGICAL, MPI_INT, &
-              MPI_DOUBLE, MPI_INT, MPI_INT, MPI_INT, MPI_INT, MPI_INT/)
+    types = (/MPI_DEFREAL, MPI_DEFREAL, MPI_DEFREAL, MPI_DEFREAL, MPI_SHORTINT,   &
+              MPI_DEFBOOL, MPI_SHORTINT, MPI_DEFREAL, MPI_SHORTINT, MPI_SHORTINT, &
+              MPI_SHORTINT, MPI_SHORTINT, MPI_SHORTINT/)
 
     ! Create array of memory byte displacements
     call mpi_get_address(state % wgt, displacements(1), ierr)
@@ -115,6 +116,7 @@ subroutine mpiInitTypes()
     ! Define new type
     call mpi_type_create_struct(stateSize, blockLengths, displacements, types, MPI_PARTICLE_STATE, ierr)
     call mpi_type_commit(MPI_PARTICLE_STATE, ierr)
+
 #endif
 
   end subroutine mpiInitTypes
diff --git a/Tallies/scoreMemory_class.f90 b/Tallies/scoreMemory_class.f90
index bbe88a453..bf67940c5 100644
--- a/Tallies/scoreMemory_class.f90
+++ b/Tallies/scoreMemory_class.f90
@@ -2,8 +2,8 @@ module scoreMemory_class
 
   use numPrecision
 #ifdef MPI
-  use mpi_func,           only : mpi_reduce, MPI_SUM, MPI_DOUBLE, MPI_COMM_WORLD, &
-                                 MASTER_RANK, isMPIMaster, MPI_INT
+  use mpi_func,           only : mpi_reduce, isMPIMaster, MPI_SUM, &
+                                 MPI_COMM_WORLD, MASTER_RANK, MPI_DEFREAL, MPI_SHORTINT
 #endif
   use universalVariables, only : array_pad
   use genericProcedures,  only : fatalError, numToChar
@@ -449,7 +449,7 @@ subroutine collectDistributed(self)
       ! Reduce the batch count
       ! Note we need to use size 1 arrays to fit the interface of mpi_reduce, which expects
       ! to be given arrays
-      call mpi_reduce(self % batchN, buffer, 1, MPI_INT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
+      call mpi_reduce(self % batchN, buffer, 1, MPI_SHORTINT, MPI_SUM, MASTER_RANK, MPI_COMM_WORLD, error)
       if (isMPIMaster()) then
         self % batchN = buffer
       else
@@ -510,7 +510,7 @@ subroutine reduceArray(data, buffer)
       call mpi_reduce(data(start : start + chunk - 1), &
                       buffer(start : start + chunk - 1), &
                       int(chunk, shortInt), &
-                      MPI_DOUBLE, &
+                      MPI_DEFREAL, &
                       MPI_SUM, &
                       MASTER_RANK, &
                       MPI_COMM_WORLD, &
diff --git a/docs/Running.rst b/docs/Running.rst
index 8913299dd..6aeb21655 100644
--- a/docs/Running.rst
+++ b/docs/Running.rst
@@ -18,7 +18,7 @@ and to the input file::
 
 .. admonition:: Options
 
-   OpenMPI
+   OpenMP
      Specifies the number ``<n>`` of OpenMP threads to be used::
 
 	--omp <n>
@@ -34,4 +34,6 @@ and to the input file::
 
 	<path_to_mpirun>/mpirun -np <n> <path_to_build_folder>/scone.out <input_file>
 
+     OpenMP and MPI can be run together too::
 
+	<path_to_mpirun>/mpirun -np <n> <path_to_build_folder>/scone.out --omp <n> <input_file>