Merge pull request #211 from BerkeleyLab/parallel

Merge parallel into main
BerkeleyLab · Oct 1, 2024 · af9bc3a · af9bc3a
2 parents f9fec23 + ade67db
commit af9bc3a
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 15 deletions.
diff --git a/example/train-and-write.F90 b/example/train-and-write.F90
@@ -78,9 +78,8 @@ program train_and_write
 #else
         associate(network_outputs => trainable_engine%infer(inputs))
 #endif
-          print *," Outputs                          |&
-                   Desired outputs                    |&
-                   Errors"
+          print "(a,62x,a,53x,a)", " Output", "| Desired outputs", "| Errors"
+
           do p = 1, num_pairs
             print *,network_outputs(p)%values(),"|", inputs(p)%values(), "|",  network_outputs(p)%values() - inputs(p)%values()
           end do
@@ -106,15 +105,24 @@ subroutine output(inference_engine, file_name)
     call json_file%write_lines(file_name)
   end subroutine
 
+  pure function e(m,n) result(e_mn)
+    integer, intent(in) :: m, n
+    real e_mn
+    e_mn = real(merge(1,0,m==n))
+  end function
+
   function perturbed_identity_network(perturbation_magnitude) result(trainable_engine)
     type(trainable_engine_t) trainable_engine
     real, intent(in) :: perturbation_magnitude
-    integer, parameter :: nodes_per_layer(*) = [2, 2, 2, 2]
+    integer, parameter :: nodes_per_layer(*) = [4, 4, 4, 4]
     integer, parameter :: max_n = maxval(nodes_per_layer), layers = size(nodes_per_layer)
-    integer i
-    real, parameter :: identity(*,*,*) = &
-      reshape(real([( [1,0], [0,1], i=1,layers-1 )]), [max_n, max_n, layers-1])
-    real w_harvest(size(identity,1), size(identity,2), size(identity,3)), b_harvest(size(identity,1), size(identity,3))
+    integer i, j, l
+    real, allocatable :: identity(:,:,:), w_harvest(:,:,:), b_harvest(:,:)
+
+    identity =reshape([( [( [(e(i,j),j=1,max_n)], i=1,max_n )], l=1,layers-1 )], [max_n, max_n, layers-1])
+
+    allocate(w_harvest(max_n, max_n, layers-1))
+    allocate(b_harvest(max_n,layers-1))
 
     call random_number(w_harvest)
     call random_number(b_harvest)
@@ -128,6 +136,7 @@ function perturbed_identity_network(perturbation_magnitude) result(trainable_eng
       )
 
     end associate
+
   end function
 
 end program
diff --git a/src/inference_engine/trainable_engine_s.F90 b/src/inference_engine/trainable_engine_s.F90
@@ -1,12 +1,17 @@
 ! Copyright (c), The Regents of the University of California
 ! Terms of use are as specified in LICENSE.txt
 
+#ifndef F2023_LOCALITY
 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 202400)
-# define F2023_REDUCE_LOCALITY
+# define F2023_LOCALITY 1
+#endif
 #endif
 
-! TODO: #define F2018_LOCALITY_SPECIFIERS for the Cray, LLVM flang, and
-!       older Intel ifx compilers and add the corresponding code.
+#ifndef F2018_LOCALITY
+#if defined(_CRAYFTN)
+# define F2018_LOCALITY 1
+#endif
+#endif
 
 submodule(trainable_engine_m) trainable_engine_s
   use assert_m, only : assert
@@ -139,7 +144,7 @@
 
     associate(output_layer => ubound(self%n,1))
 
-#ifdef F2023_REDUCE_LOCALITY
+#if F2023_LOCALITY || F2018_LOCALITY
       if (.not. allocated(self%z)) allocate(self%z,  mold=self%b) ! z-values: Sum z_j^l = w_jk^{l} a_k^{l-1} + b_j^l
       if (.not. allocated(self%delta)) allocate(self%delta, mold=self%b)
       if (.not. allocated(self%a)) allocate(self%a(maxval(self%n), input_layer:output_layer)) ! Activations
@@ -179,9 +184,22 @@
               real(rkind), allocatable :: pair_cost(:)
               if (present(cost)) allocate(pair_cost(mini_batch_size))
 
-#ifdef F2023_REDUCE_LOCALITY
+#if F2023_LOCALITY
               iterate_through_batch: &
               do concurrent (pair = 1:mini_batch_size) local(a,z,delta) reduce(+: dcdb, dcdw)
+
+#elif F2018_LOCALITY
+
+              reduce_gradients: &
+              block
+                real(rkind) reduce_dcdb(size(dcdb,1),size(dcdb,2),mini_batch_size)
+                real(rkind) reduce_dcdw(size(dcdw,1),size(dcdw,2),size(dcdw,3),mini_batch_size)
+                reduce_dcdb = 0._rkind
+                reduce_dcdw = 0._rkind
+
+              iterate_through_batch: &
+              do concurrent (pair = 1:mini_batch_size) local(a,z,delta)
+
 #else
 
               reduce_gradients: &
@@ -229,7 +247,7 @@
                   integer j
                   sum_gradients: &
                   do l = 1,output_layer
-#ifdef F2023_REDUCE_LOCALITY
+#if F2023_LOCALITY
                     dcdb(1:n(l),l) = dcdb(1:n(l),l) + delta(1:n(l),l)
                     do concurrent(j = 1:n(l)) reduce(+: dcdw)
                       dcdw(j,1:n(l-1),l) = dcdw(j,1:n(l-1),l) + a(1:n(l-1),l-1)*delta(j,l)
@@ -243,8 +261,15 @@
                   end do sum_gradients
                 end block
 
-#ifdef F2023_REDUCE_LOCALITY
+#if F2023_LOCALITY
+                end do iterate_through_batch
+#elif F2018_LOCALITY
+
                 end do iterate_through_batch
+                dcdb = sum(reduce_dcdb,dim=3)
+                dcdw = sum(reduce_dcdw,dim=4)
+
+                end block reduce_gradients
 #else
                 end block iteration
                 end do iterate_through_batch