From 86d4071f10120f6f503745ff9598e033501439bd Mon Sep 17 00:00:00 2001
From: "michael.lueken" <Michael.Lueken@noaa.gov>
Date: Fri, 28 May 2021 15:31:10 +0000
Subject: [PATCH] GitHub Issue #118. Implement EFSOI process in FV3 GFS
 workflow - EFSOI-specific util source code.

---
 util/EFSOI_Utilities/src/CMakeLists.txt       |  21 +
 util/EFSOI_Utilities/src/efsoi.f90            | 329 +++++++
 util/EFSOI_Utilities/src/efsoi_main.f90       | 166 ++++
 util/EFSOI_Utilities/src/gridio_efsoi.f90     | 884 ++++++++++++++++++
 util/EFSOI_Utilities/src/loadbal_efsoi.f90    | 380 ++++++++
 util/EFSOI_Utilities/src/loc_advection.f90    | 100 ++
 .../src/scatter_chunks_efsoi.f90              | 153 +++
 util/EFSOI_Utilities/src/statevec_efsoi.f90   | 319 +++++++
 8 files changed, 2352 insertions(+)
 create mode 100644 util/EFSOI_Utilities/src/CMakeLists.txt
 create mode 100644 util/EFSOI_Utilities/src/efsoi.f90
 create mode 100644 util/EFSOI_Utilities/src/efsoi_main.f90
 create mode 100644 util/EFSOI_Utilities/src/gridio_efsoi.f90
 create mode 100644 util/EFSOI_Utilities/src/loadbal_efsoi.f90
 create mode 100644 util/EFSOI_Utilities/src/loc_advection.f90
 create mode 100644 util/EFSOI_Utilities/src/scatter_chunks_efsoi.f90
 create mode 100644 util/EFSOI_Utilities/src/statevec_efsoi.f90

diff --git a/util/EFSOI_Utilities/src/CMakeLists.txt b/util/EFSOI_Utilities/src/CMakeLists.txt
new file mode 100644
index 0000000000..49d2a8fdee
--- /dev/null
+++ b/util/EFSOI_Utilities/src/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 2.6)
+if(BUILD_EFSOI)
+
+  file(GLOB LOCAL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*90)
+
+  set_source_files_properties( ${LOCAL_SRC} PROPERTIES COMPILE_FLAGS ${ENKF_Fortran_FLAGS} )
+  
+  add_executable(global_efsoi.x  ${LOCAL_SRC} )
+  
+  SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${OMPFLAG}" )
+  
+  include_directories(${CMAKE_CURRENT_BINARY_DIR} "${PROJECT_BINARY_DIR}/include/global"  ${CMAKE_CURRENT_BINARY_DIR}/.. ${MPI_Fortran_INCLUDE_DIRS} ${MPI_Fortran_INCLUDE_PATH} ${CORE_INCS} ${NETCDF_INCLUDE_DIRS} ${NCDIAG_INCS} ${FV3GFS_NCIO_INCS}) 
+
+  target_link_libraries( global_efsoi.x enkflib enkfdeplib ${GSILIB} ${GSISHAREDLIB} ${CORE_LIBRARIES}
+  ${MPI_Fortran_LIBRARIES} ${LAPACK_LIBRARIES} ${NETCDF_Fortran_LIBRARIES} ${NETCDF_C_LIBRARIES}
+  ${FV3GFS_NCIO_LIBRARIES} 
+  ${EXTRA_LINKER_FLAGS} ${GSI_LDFLAGS} ${CORE_BUILT} ${CORE_LIBRARIES} ${CORE_BUILT} ${NCDIAG_LIBRARIES})  
+endif()
+
+
+
diff --git a/util/EFSOI_Utilities/src/efsoi.f90 b/util/EFSOI_Utilities/src/efsoi.f90
new file mode 100644
index 0000000000..54527c559c
--- /dev/null
+++ b/util/EFSOI_Utilities/src/efsoi.f90
@@ -0,0 +1,329 @@
+module efsoi
+!$$$  module documentation block
+!
+! module: efsoi                        Update observation impact estimates
+!                                      with the EnSRF framework.
+!
+! prgmmr: ota              org: np23                   date: 2011-12-01
+! prgmmr: groff            org: emc                    date: 2018-05-24
+!
+! abstract:
+!  Computes the observation impact estimates using the EnKF analysis products.
+!  Formulation is based on Kalnay et al (2012, Tellus A, submitted).
+!  Parallel processing is following the way of the serial EnSRF.
+!
+! Public Subroutines:
+!  efsoi_update: performs the Forecast Sensitivity to Observations update within
+!              the EnSRF framework. The EnKF other than the serial EnSRF 
+!              (e.g. the LETKF) can be also used here (the method is 
+!              independent on the type of EnKF).
+!
+! Public Variables: None
+!
+! Modules Used: kinds, constants, params, covlocal, mpisetup, loadbal, statevec,
+!               kdtree2_module, obsmod, gridinfo, obs_sensitivity
+!
+! program history log:
+!   2011-12-01  Created from the LETKF core module.
+!   2012-04-04  Changed to EnSRF framework for saving memory
+!   2018-05-24  Renamed module added non-bias corrected EFSOI
+!
+! attributes:
+!   language: f95
+!
+!$$$
+
+use mpisetup
+use covlocal, only:  taper
+use kinds, only: r_double, i_kind, r_kind
+use kdtree2_module, only: kdtree2_r_nearest, kdtree2_result
+use loadbal_efsoi, only: numptsperproc, indxproc, lnp_chunk, kdtree_grid, &
+                   iprocob, indxob_chunk, anal_obchunk_prior, numobsperproc, &
+                   indxproc_obs, nobs_max
+use scatter_chunks_efsoi, only: fcerror_chunk, anal_chunk
+use enkf_obsmod, only: oberrvar, ob, ensmean_ob, obloc, obloclon, obloclat, oblnp, &
+                       obtime, nobstot, corrlengthsq, lnsigl, obtimel, anal_ob
+use constants, only: constants_initialized, pi, zero, one
+use params, only: nanals, corrlengthnh, corrlengthtr, corrlengthsh, &
+                  tar_minlon, tar_maxlon, tar_minlat, tar_maxlat, &
+                  tar_minlev, tar_maxlev
+use gridinfo, only: nlevs_pres, lonsgrd, latsgrd
+use statevec_efsoi, only: id_u, id_v, id_t, id_q, id_ps
+use enkf_obs_sensitivity, only: obsense_kin, obsense_dry, obsense_moist, adloc_chunk
+
+implicit none
+
+private
+public :: efsoi_update
+
+contains
+
+subroutine efsoi_update()
+implicit none
+real(r_kind),allocatable,dimension(:) :: djdy_kin, djdy_dry, djdy_moist
+real(r_kind),allocatable,dimension(:) :: uvwork, tpwork, qwork
+real(r_kind),allocatable,dimension(:) :: taper_disgrd
+real(r_kind),dimension(nobstot) :: obdep, oberinv
+real(r_single),dimension(nobstot) :: invobtimel, invlnsigl
+real(r_single),dimension(nobstot) :: invcorlen
+real(r_kind),allocatable,dimension(:,:) :: buffertmp
+type(kdtree2_result),allocatable,dimension(:) :: sresults1
+real(r_kind) :: anal_obtmp(nanals)
+real(r_kind) :: r,taper1,taper3,taperv
+real(r_single) :: lnsig
+real(r_double) :: t1, t2, t3, t4, tbegin, tend
+integer(i_kind) :: np, nob, nob1, nob2, nobm, npob, nf2, i, ii, npt, nanal, nn
+integer(i_kind) :: nnpt, nsame, nskip, ngrd1
+integer(i_kind) :: ierr
+logical :: kdgrid
+
+
+if (.not. constants_initialized) then
+    print *,'constants not initialized (with init_constants, init_constants_derived)'
+    call stop2(28)
+end if
+
+allocate(sresults1(nlevs_pres*numptsperproc(nproc+1)))
+allocate(taper_disgrd(nlevs_pres*numptsperproc(nproc+1)))
+
+! Compute the inverse of cut-off length and 
+! the observation departure from first guess
+!$omp parallel do private(nob)
+do nob=1,nobstot
+   invcorlen(nob) = one/corrlengthsq(nob)
+   invlnsigl(nob) = one/lnsigl(nob)
+   invobtimel(nob)= one/obtimel(nob)
+   oberinv(nob)   = one/oberrvar(nob)
+   obdep(nob)     = ob(nob)-ensmean_ob(nob)
+end do
+
+
+kdgrid=associated(kdtree_grid)
+allocate(djdy_kin(nobstot))
+allocate(djdy_dry(nobstot))
+allocate(djdy_moist(nobstot))
+djdy_kin(1:nobstot) = zero
+djdy_dry(1:nobstot) = zero
+djdy_moist(1:nobstot) = zero
+allocate(uvwork(nanals))
+allocate(tpwork(nanals))
+allocate(qwork(nanals))
+
+nsame = 0
+nskip = 0
+nobm = 1
+t2 = zero
+t3 = zero
+t4 = zero
+tbegin = mpi_wtime()
+
+! 
+! Loop for each observations
+obsloop: do nob=1,nobstot
+
+
+   if (nproc == 0 .and. modulo(nob , 10000) == 0 ) print *,'doing nob=',nob
+
+   t1 = mpi_wtime()
+
+   if(oberrvar(nob) > 1.e10_r_kind .or. abs(obtime(nob)) >= obtimel(nob))then
+      nskip = nskip + 1
+      cycle obsloop
+   end if
+
+   ! what processor is this ob on?
+   npob = iprocob(nob)
+
+   ! get anal_ob from that processor and send to other processors.
+   if (nproc == npob) then
+      nob1 = indxob_chunk(nob)
+      anal_obtmp(1:nanals) = anal_obchunk_prior(:,nob1)
+   end if
+ 
+   call mpi_bcast(anal_obtmp,nanals,mpi_realkind,npob,mpi_comm_world,ierr)
+   
+   anal_obtmp(1:nanals) = anal_obtmp(1:nanals) * oberinv(nob)
+
+   t2 = t2 + mpi_wtime() - t1
+   t1 = mpi_wtime()
+
+
+
+
+
+   ! ============================================================  
+   ! Only need to recalculate nearest points when lat/lon is different
+   ! ============================================================  
+   if(nob == 1 .or. &
+         abs(obloclat(nob)-obloclat(nobm)) .gt. tiny(obloclat(nob)) .or. &
+         abs(obloclon(nob)-obloclon(nobm)) .gt. tiny(obloclon(nob)) .or. &
+         abs(corrlengthsq(nob)-corrlengthsq(nobm)) .gt. tiny(corrlengthsq(nob))) then
+		 
+      nobm=nob
+      ! determine localization length scales based on latitude of ob.
+      
+      nf2=0
+      ! search analysis grid points for those within corrlength of 
+      ! ob being assimilated (using a kd-tree for speed).
+      if (kdgrid) then
+         call kdtree2_r_nearest(tp=kdtree_grid,qv=obloc(:,nob), &
+              r2=corrlengthsq(nob), &
+              nfound=nf2,nalloc=nlevs_pres*numptsperproc(nproc+1), &
+              results=sresults1)
+      else
+         ! use brute force search if number of grid points on this proc <= 3
+         do nn=1,nlevs_pres
+            do npt=1,numptsperproc(nproc+1)
+               nnpt = nlevs_pres * (npt-1) + nn
+               r = sum( (obloc(:,nob)-adloc_chunk(:,nnpt))**2, 1 )
+               if (r < corrlengthsq(nob)) then
+                  nf2 = nf2 + 1
+                  sresults1(nf2)%idx = nnpt
+                  sresults1(nf2)%dis = r
+               end if
+            end do
+         end do
+      end if
+	  
+	  !$omp parallel do private(nob1)
+      do nob1=1,nf2
+         taper_disgrd(nob1) = taper(sqrt(sresults1(nob1)%dis*invcorlen(nob)))
+      end do
+	  
+   else
+      nsame=nsame+1
+   end if
+
+   t3 = t3 + mpi_wtime() - t1
+   t1 = mpi_wtime()
+
+   ! ============================================================  
+   ! forecast error sensitivity to the observations
+   ! ============================================================  
+   if (nf2 > 0) then
+	   
+      taper3=taper(obtime(nob)*invobtimel(nob))
+	  
+      uvwork(1:nanals) = zero
+      tpwork(1:nanals) = zero
+      qwork(1:nanals)  = zero
+	  
+      ! rho * X_f^T (e_t|0 + e_t|-6) / 2
+      ! contributions from (U,V), (T,Ps) and Q are computed separately
+      
+      do ii=1,nf2
+         taper1=taper_disgrd(ii)*taper3
+		 
+         i = (sresults1(ii)%idx-1)/nlevs_pres+1
+		 
+         ngrd1=indxproc(nproc+1,i)
+		 
+         if(tar_minlon <= tar_maxlon .and. &
+              & (lonsgrd(ngrd1) < tar_minlon .or. lonsgrd(ngrd1) > tar_maxlon .or. &
+              & latsgrd(ngrd1) < tar_minlat .or. latsgrd(ngrd1) > tar_maxlat)) &
+              & cycle
+			  
+         if(tar_minlon > tar_maxlon .and. &
+              & ((lonsgrd(ngrd1) < tar_minlon .and. lonsgrd(ngrd1) > tar_maxlon) .or. &
+              & latsgrd(ngrd1) < tar_minlat .or. latsgrd(ngrd1) > tar_maxlat)) &
+              & cycle
+			  
+         nn = sresults1(ii)%idx - (i-1)*nlevs_pres
+		 
+         if((tar_minlev /= 1 .or. nn /= nlevs_pres) &
+              & .and. (nn > tar_maxlev .or. nn < tar_minlev)) cycle
+         lnsig = abs(lnp_chunk(i,nn)-oblnp(nob))
+		 
+         if(lnsig < lnsigl(nob))then
+            taperv=taper1*taper(lnsig*invlnsigl(nob))
+         else
+            cycle
+         end if
+		 
+         if(nn == nlevs_pres) then
+            do nanal=1,nanals
+               tpwork(nanal) = tpwork(nanal) + taperv &
+                    & * anal_chunk(nanal,i,id_ps) * fcerror_chunk(i,id_ps)
+            end do
+            cycle
+         end if
+
+
+         ! 		 
+         do nanal=1,nanals
+            uvwork(nanal) = uvwork(nanal) + taperv &
+                 & * (anal_chunk(nanal,i,id_u(nn)) * fcerror_chunk(i,id_u(nn)) &
+                 & + anal_chunk(nanal,i,id_v(nn)) * fcerror_chunk(i,id_v(nn)))
+				 
+            tpwork(nanal) = tpwork(nanal) + taperv &
+                 & * anal_chunk(nanal,i,id_t(nn)) * fcerror_chunk(i,id_t(nn))
+				 
+            if(id_q(nn) > 0) qwork(nanal) = qwork(nanal) + taperv &
+                 & * anal_chunk(nanal,i,id_q(nn)) * fcerror_chunk(i,id_q(nn))
+         end do
+		 
+      end do
+	  
+      ! R^-1 HX_a [rho * X_f^T (e_t|0 + e_t|-6) / 2]
+      do nanal=1,nanals
+         djdy_kin(nob) 		= djdy_kin(nob)   + anal_obtmp(nanal) * uvwork(nanal)
+         djdy_dry(nob) 		= djdy_dry(nob)   + anal_obtmp(nanal) * tpwork(nanal)
+         djdy_moist(nob) 	= djdy_moist(nob) + anal_obtmp(nanal) * qwork(nanal)
+      end do
+	  
+   end if ! end of, if (nf2 > 0) then
+  
+   
+   t4 = t4 + mpi_wtime() - t1
+   t1 = mpi_wtime()
+
+end do obsloop
+
+tend = mpi_wtime()
+if (nproc == 0 .or. nproc == numproc-1) print *,'time to process FSO on gridpoint = ',tend-tbegin,t2,t3,t4,' secs'
+
+if (nproc == 0 .and. nskip > 0) print *,nskip,' out of',nobstot,'obs skipped'
+if (nproc == 0 .and. nsame > 0) print *,nsame,' out of', nobstot-nskip,' same lat/lon'
+
+! Observation sensitivity post process
+!$omp parallel do private(nob)
+do nob=1,nobstot
+   djdy_dry(nob) 		= djdy_dry(nob)   + djdy_kin(nob)
+   djdy_moist(nob) 		= djdy_moist(nob) + djdy_dry(nob)
+   obsense_kin(nob) 	= djdy_kin(nob)   * obdep(nob)
+   obsense_dry(nob) 	= djdy_dry(nob)   * obdep(nob)
+   obsense_moist(nob) 	= djdy_moist(nob) * obdep(nob)
+end do
+
+! Gathering analysis perturbations projected on the observation space
+if(nproc /= 0) then
+  call mpi_send(anal_obchunk_prior,numobsperproc(nproc+1)*nanals,mpi_real4,0, &
+       1,mpi_comm_world,ierr)
+else
+   allocate(anal_ob(1:nanals,nobstot))
+   allocate(buffertmp(nanals,nobs_max))
+   do np=1,numproc-1
+      call mpi_recv(buffertmp,numobsperproc(np+1)*nanals,mpi_real4,np, &
+           1,mpi_comm_world,mpi_status,ierr)
+      do nob1=1,numobsperproc(np+1)
+         nob2 = indxproc_obs(np+1,nob1)
+         anal_ob(:,nob2) = buffertmp(:,nob1)
+      end do
+   end do
+   do nob1=1,numobsperproc(1)
+      nob2 = indxproc_obs(1,nob1)
+      anal_ob(:,nob2) = anal_obchunk_prior(:,nob1)
+   end do
+   deallocate(buffertmp)
+end if
+
+t1 = mpi_wtime() - tend
+if (nproc == 0) print *,'time to observation sensitivity post process = ',t1,' secs'
+
+deallocate(anal_obchunk_prior)
+deallocate(sresults1)
+deallocate(djdy_kin,djdy_dry,djdy_moist,taper_disgrd,uvwork,tpwork,qwork)
+
+return
+end subroutine efsoi_update
+end module efsoi
diff --git a/util/EFSOI_Utilities/src/efsoi_main.f90 b/util/EFSOI_Utilities/src/efsoi_main.f90
new file mode 100644
index 0000000000..e4e6307441
--- /dev/null
+++ b/util/EFSOI_Utilities/src/efsoi_main.f90
@@ -0,0 +1,166 @@
+program efsoi_main
+!$$$  main program documentation block
+!
+! program:  efsoi_main                  high level driver program for 
+!                                       efsoi calculations.
+!
+! prgmmr: Ota         org: EMC/JMA               date: 2012
+! prgmmr: Groff       org: EMC                   date: 2018
+!
+! abstract: This is the main program for EFSOI code. It does the following:
+!           a) initialize MPI, read EFSOI namelist from efsoi.nml on each task.
+!           b) reads observation sensitivity files.
+!           c) read horizontal grid information (lat/lon of each grid point) and
+!              pressure at each grid point/vertical level.
+!           d) decomposition of horizontal grid points and observation
+!              priors to minimize load imbalance for EFSOI calcs.
+!           e) read forecast and analysis states necessary for
+!              EFSOI calculations.
+!           f) Initialize/allocate for EFSOI moist/dry/kinetic
+!              total output
+!           g) Estimate the location of observation response
+!              (i.e. advect the localization).  Default estimation
+!              approach is to multiply meridional and zonal wind
+!              by 0.75.
+!           h) Perform EFSOI calculations for all observations
+!              considered for assimilation during the EnSRF update.
+!           i) write EFSOI calculations and ancillary EFSOI
+!              information to file
+!           j) deallocate all allocatable arrays, finalize MPI.
+!
+! program history log:
+!   2018-04-30  Initial development. Adaptation of enkf_main program
+!               towards EFSOI calculation process
+!
+! usage:
+!   input files: Update to FV3 nomenclature
+!     sigfAT_YYYYMMDDHH_mem* - Ensemble of forecasts valid at advance time
+!                              (AT) hours beyond the initial time
+!     sigfAT_YYYYMMDDHH_ensmean - Ensemble mean forecast AT hours from
+!                                 initial time
+!     sigfAT+6_YYYYMMDDHH-6_ensmean - Ensemble mean forecast AT+6 hours
+!                                     from initial time minus 6 hours
+!     siganl.YYYYMMDDHH.gdas - 
+!   output files: 
+!     obimpact_YYYYMMDDHH.dat - observation impact file
+!                         
+! comments: This program is a wrapper for components needed to peform
+!           EFSOI calculations
+!
+! attributes:
+!   language: f95
+!
+!$$$
+
+ use kinds, only: r_double,i_kind
+ ! reads namelist parameters.
+ ! applying enkf namelist apparatus
+ use params, only : read_namelist,nanals
+ ! mpi functions and variables.
+ use mpisetup, only:  mpi_initialize, mpi_initialize_io, mpi_cleanup, nproc, &
+                      mpi_wtime, mpi_comm_world
+ ! model state vector 
+ use statevec_efsoi, only: read_state_efsoi, statevec_cleanup_efsoi, init_statevec_efsoi
+ ! load balancing
+ use loadbal_efsoi, only: load_balance_efsoi, loadbal_cleanup_efsoi
+ ! efsoi update
+ use efsoi, only: efsoi_update
+ ! Observation sensitivity usage
+ use enkf_obs_sensitivity, only: init_ob_sens, print_ob_sens, destroy_ob_sens, read_ob_sens
+ use loc_advection, only: loc_advection_efsoi
+ ! Scatter chunks for EFSOI
+ use scatter_chunks_efsoi, only: scatter_chunks_ob_impact
+ use omp_lib, only: omp_get_max_threads
+ ! Needed for rad fixed files 
+ use radinfo, only: init_rad, init_rad_vars 
+ 
+ implicit none
+ integer :: ierr, nth
+ real(r_double) t1,t2
+
+ ! 0. initialize MPI.
+ call mpi_initialize()
+ if (nproc==0) call w3tagb('EFSOI_CALC',2021,0319,0055,'NP25')
+
+ call init_rad()
+
+ ! 1. read namelist.
+ call read_namelist()
+ 
+ ! 2. initialize MPI communicator for IO tasks.
+ call mpi_initialize_io(nanals)
+
+ ! 3. Initialize rad vars
+ call init_rad_vars()
+
+ ! 4. read the necessary inputs for the EFSOI calculation from file
+ t1 = mpi_wtime()
+ call read_ob_sens()
+ t2 = mpi_wtime()
+ if (nproc == 0) print *, 'time in read_ob_sens = ',t2-t1,'on proc', nproc
+
+ nth= omp_get_max_threads()
+ if(nproc== 0)write(6,*) 'efsoi_main:  number of threads ',nth
+
+ ! 5. Halt processors until all are completed
+ call mpi_barrier(mpi_comm_world, ierr)
+
+ ! 6. Initialize state vector information
+ call init_statevec_efsoi()
+
+ ! 7. read in ensemble forecast members,
+ ! valid at the evaluation forecast
+ ! time, distribute pieces to each task.
+ t1 = mpi_wtime()
+ call read_state_efsoi()
+ t2 = mpi_wtime()
+ if (nproc == 0) print *,'time in read_state_efsoi =',t2-t1,'on proc',nproc
+
+ ! 8. do load balancing (partitioning of grid points
+ ! and observations among processors)
+ t1 = mpi_wtime()
+ call load_balance_efsoi()
+ t2 = mpi_wtime()
+ if (nproc == 0) print *,'time in load_balance_efsoi =',t2-t1,'on proc',nproc
+
+ ! 9. apply scattering of efsoi chunks
+ t1 = mpi_wtime()
+ call scatter_chunks_ob_impact()  ! ensemble scattering
+ t2 = mpi_wtime()
+ if (nproc == 0) print *,'time to scatter observation impact chunks =',t2-t1,'on proc',nproc
+
+ ! 10. Initialize EFSOI variables
+ t1 = mpi_wtime()
+ call init_ob_sens()
+ t2 = mpi_wtime() 
+ if (nproc == 0) print *,'time to allocate ob sensitivity variables =',t2-t1,'on proc',nproc
+
+ ! 11. Calculate the estimated location of observation
+ ! response at evaluation time
+ t1 = mpi_wtime()
+ call loc_advection_efsoi()
+ t2 = mpi_wtime()
+ if (nproc == 0) print *,'time in loc_advection_efsoi =',t2-t1,'on proc',nproc
+
+ ! 12. Perform the EFSOI calcs
+ t1 = mpi_wtime()
+ call efsoi_update()
+ t2 = mpi_wtime()
+ if (nproc == 0) print *,'time in efsoi_update =',t2-t1,'on proc',nproc
+
+ ! 13. print EFSOI sensitivity i/o on root task.
+ t1 = mpi_wtime()
+ call print_ob_sens()
+ t2 = mpi_wtime()
+ if (nproc == 0) print *,'time needed to write observation impact file =',t2-t1,'on proc',nproc
+
+ ! 14. Cleanup for EFSOI configuration
+ call statevec_cleanup_efsoi()
+ call loadbal_cleanup_efsoi()
+ call destroy_ob_sens()
+
+ ! 15. finalize MPI.
+ if (nproc==0) call w3tage('EFSOI_CALC')
+ call mpi_cleanup()
+
+end program efsoi_main
diff --git a/util/EFSOI_Utilities/src/gridio_efsoi.f90 b/util/EFSOI_Utilities/src/gridio_efsoi.f90
new file mode 100644
index 0000000000..4687522c67
--- /dev/null
+++ b/util/EFSOI_Utilities/src/gridio_efsoi.f90
@@ -0,0 +1,884 @@
+ module gridio_efsoi
+!$$$  module documentation block
+!
+! module: gridio_efsoi               subroutines for reading and writing
+!                                    ensemble members files using
+!                                    EnKF internal format.  A separate
+!                                    program must be run before and
+!                                    after the EnKF analysis to convert
+!                                    to and from the native model format.
+!
+! prgmmr: whitaker         org: esrl/psd               date: 2009-02-23
+! prgmmr: groff            org: emc                    date: 2018-05-24
+! prgmmr: eichmann, lin    org: emc                    date: 2021-02-04
+!
+! abstract: I/O for ensemble member files.
+! 
+! Public Functions:
+!  readgriddata_efsoi
+!
+! this version reads nemsio files for EFSOI applications.
+!
+! Public Variables: None
+!
+! Modules Used: constants (must be pre-initialized).
+!
+! program history log:
+!   2009-02-23  Initial version.
+!   2015-06-29  Add ability to read/write multiple time levels
+!   2016-05-02  shlyaeva: Modification for reading state vector from table
+!   2016-04-20  Modify to handle the updated nemsio sig file (P, DP, DPDT
+!               removed)
+!   2016-11-29  shlyaeva: Add reading/calculating tsen, qi, ql. Pass filenames and
+!               hours to read routine to read separately state and control files. 
+!               Pass levels and dimenstions to read/write routines (dealing with
+!               prse: nlevs + 1 levels). Pass "reducedgrid" parameter.
+!   2017-06-14  Adding functionality to optionally write non-inflated ensembles,  
+!               a required input for EFSO calculations
+!   2018-05-24  Pruning available EnKF nemsio read functionality for EFSOI
+!               application.  Add additional routines to compute EFSOI relevant
+!               quantities from files read.
+!   2019-03-13  Add precipitation components
+!   2019-07-10  Add convective clouds
+!   2021-02-04  Added functionality for FV3 GFS, netcdf file handling
+!
+! attributes:
+!   language: f95
+!
+!$$$
+ use constants, only: zero,one,cp,fv,rd,tiny_r_kind,max_varname_length,t0c,r0_05,constants_initialized, &
+                      tref,pref,hvap				  
+					  
+ use params, only: nlons,nlats,nlevs,use_gfs_nemsio,pseudo_rh, &
+                   cliptracers,datapath,imp_physics,use_gfs_ncio,cnvw_option, &
+                   nanals, &
+                   wmoist,andataname, &
+                   nvars,forecast_impact,read_member_forecasts, &
+                   read_ensmean_forecast, read_analysis_mean, &
+                   read_member_forecasts, read_verification, &
+                   read_member_analyses
+ use kinds, only: i_kind,r_double,r_kind,r_single
+ use gridinfo, only: ntrunc,npts ! getgridinfo be called first!
+
+ use specmod, only: sptezv_s, sptez_s, init_spec_vars, ndimspec => nc, &
+                    isinitialized, &
+					gaulats
+
+ use reducedgrid_mod, only: regtoreduced, reducedtoreg, &
+                            lonsperlat, nlonsfull
+
+ use mpisetup, only: nproc, numproc
+ 	
+ use mpimod, only: mpi_comm_world, mpi_sum, mpi_real4, mpi_real8, mpi_rtype 						
+ 
+ use mpeu_util, only: getindex
+ use nemsio_module
+ use loadbal_efsoi, only: numptsperproc, indxproc, npts_max
+
+ implicit none
+ private
+ public :: readgriddata_efsoi
+ public :: get_weight, destroy_weight, divide_weight
+ real(r_kind), allocatable, dimension(:,:), save :: weight
+ real(r_kind), allocatable, dimension(:), save :: grweight
+ integer(i_kind),public :: ncdim
+ contains
+
+ ! =====================================================================
+ ! =====================================================================
+ ! =====================================================================
+ subroutine get_weight()
+
+  use sigio_module, only: sigio_head, sigio_data, sigio_sclose, sigio_sropen, &
+                          sigio_srohdc, sigio_sclose, sigio_aldata, sigio_axdata
+  use nemsio_module, only: nemsio_gfile,nemsio_open,nemsio_close,&
+                           nemsio_getfilehead,nemsio_getheadvar,nemsio_realkind,nemsio_charkind,&
+                           nemsio_readrecv,nemsio_init,nemsio_setheadvar,nemsio_writerecv
+						   
+  use module_fv3gfs_ncio, only: Dataset, Variable, Dimension, open_dataset,&
+                                quantize_data, read_attribute, close_dataset, get_dim, read_vardata
+								
+  implicit none 
+ 
+  character(len=500) :: filename 
+ 
+  real(r_kind), dimension(npts,nlevs+1) :: pressi 
+  real(r_single), dimension(npts) :: tmpgrd 
+  type(nemsio_gfile) :: gfile
+  real(nemsio_realkind), dimension(nlons*nlats) :: nems_wrk
+  real(r_kind), dimension(nlons*nlats) :: psfc
+  real(r_single),allocatable,dimension(:,:,:)   :: nems_vcoord 
+  real(r_kind), allocatable, dimension(:) :: ak,bk 
+  real(r_kind) :: sumcoslat 
+ 
+  integer(i_kind) :: nlevsin,nlonsin,nlatsin,idvc 
+  integer(i_kind) :: i,j,k,iret!iunitsig,iret 
+ 
+  ! for netcdf handling 
+  type(Dataset) :: dset 
+  type(Dimension) :: londim,latdim,levdim 
+  integer(i_kind) :: iunitsig
+  type(sigio_head)   :: sighead
+  type(sigio_data)   :: sigdata
+  real(r_kind), allocatable, dimension(:)       :: psg
+  real(r_kind), dimension(ndimspec)             :: vrtspec
+  real(r_single), allocatable, dimension(:,:)   :: values_2d  
+  integer(i_kind) :: ierr  
+  
+  
+
+  iunitsig = 77
+
+  ! ============================================================
+  ! Read analysis data 
+  ! ==================================
+  ! update ncio
+  !if (nproc .eq. 0) then
+
+  filename = trim(adjustl(datapath))//trim(adjustl(andataname))
+  if (nproc == 0) print *,'reading analysis file: ',filename
+  ! --- nemsio data -------------------------------------------------		
+  if (use_gfs_nemsio) then
+     call nemsio_init(iret=iret)
+     if(iret/=0) then
+        write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_init,iret=',iret
+        call stop2(23)
+     end if
+  
+     call nemsio_open(gfile,filename,'READ',iret=iret)
+  
+     if (iret/=0) then
+        write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_open,iret=',iret
+        call stop2(23)
+     endif
+  
+     call nemsio_getfilehead(gfile,iret=iret, dimx=nlonsin, dimy=nlatsin,&
+                             dimz=nlevsin,idvc=idvc)
+     if (nlons /= nlonsin .or. nlats /= nlatsin .or. nlevs /= nlevsin) then
+        print *,'incorrect dims in nemsio file'
+        print *,'expected',nlons,nlats,nlevs
+        print *,'got',nlonsin,nlatsin,nlevsin
+        call stop2(23)
+     end if
+	 
+	
+  ! --- NetCDF data -------------------------------------------------	
+  else if (use_gfs_ncio) then
+
+	  
+     dset = open_dataset(filename)
+	 
+     londim = get_dim(dset,'grid_xt'); nlonsin = londim%len
+     latdim = get_dim(dset,'grid_yt'); nlatsin = latdim%len
+     levdim = get_dim(dset,'pfull');   nlevsin = levdim%len
+     idvc=2
+	 
+ 
+  ! --- Other data -------------------------------------------------	
+  else
+     call sigio_srohdc(iunitsig,trim(filename), &
+                       sighead,sigdata,iret)
+     if (iret /= 0) then
+        print *,'error reading file in gridio ',trim(filename)
+        call stop2(23)
+     end if
+  endif
+  
+  !endif
+  ! ============================================================ 
+	 
+  allocate(ak(nlevs+1))
+  allocate(bk(nlevs+1))
+  allocate(psg(nlons*nlats))  
+  allocate(weight(npts,nlevs)) 
+  allocate(grweight(npts)) 
+  
+      
+  if (.not. constants_initialized) then
+     print *,'constants not initialized (with init_constants, init_constants_derived)'
+     call stop2(23)
+  end if
+
+  ! calculate weight on the grid point
+  sumcoslat = zero
+  
+  ! if(reducedgrid) then
+     k=0
+     do i=1,nlats 
+        do j=1,lonsperlat(i) 
+           k=k+1 
+           grweight(k) = cos(gaulats(i)) * real(nlonsfull,r_kind) & 
+                / real(lonsperlat(i),r_kind) 
+           sumcoslat = sumcoslat + grweight(k) 
+        end do 
+     end do 
+	 
+  ! else 
+  !    do i=1,nlons*nlats 
+  !       grweight(i) = cos(latsgrd(i)) 
+  !       sumcoslat = sumcoslat + grweight(i) 
+  !    end do 
+  ! end if 
+  
+  sumcoslat = 1.0_r_kind / sumcoslat 
+  grweight(:) = sqrt(grweight(:)*sumcoslat)
+ 
+  ! ====================================================
+  ! Extract surface pressure in pa
+  ! to aid in quantifying mass
+  ! ========================================
+  ! === Option ONE, nemsio === 
+  if (use_gfs_nemsio) then  
+	  
+
+     call nemsio_readrecv(gfile,'pres','sfc',1,nems_wrk,iret=iret)
+     if (iret/=0) then
+        write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_readrecv(ps), iret=',iret
+        call stop2(23)
+     endif
+  
+     ! Assign surface pressure in pa
+     psfc = nems_wrk  
+
+     ! Extract surface pressure
+     ! on reduced gaussian grid
+     call regtoreduced(psfc,tmpgrd)
+
+     ! calculate half level pressures 
+     if (allocated(nems_vcoord))     deallocate(nems_vcoord)
+	 
+     allocate(nems_vcoord(nlevs+1,3,2))
+     call nemsio_getfilehead(gfile,iret=iret,vcoord=nems_vcoord)
+  
+     if ( iret /= 0 ) then
+        write(6,*)' gridio:  ***ERROR*** problem reading header ', &
+                  'vcoord, Status = ',iret
+        call stop2(99)
+     endif    
+  
+     if ( idvc == 0 ) then ! sigma coordinate, old file format.
+        ak = zero
+        bk = nems_vcoord(1:nlevs+1,1,1)
+     else if ( idvc == 1 ) then ! sigma coordinate
+        ak = zero
+        bk = nems_vcoord(1:nlevs+1,2,1)
+		
+     else if ( idvc == 2 .or. idvc == 3 ) then ! hybrid coordinate
+   
+        ! AFE      ak = nems_vcoord(1:nlevs+1,1,1)
+        ! AFE      ak = nems_vcoord(1:nlevs+1,2,1)
+        ak = 0.01_r_kind*nems_vcoord(1:nlevs+1,1,1) ! convert to mb
+        bk = nems_vcoord(1:nlevs+1,2,1)
+     else
+        write(6,*)'gridio:  ***ERROR*** INVALID value for idvc=',idvc
+        call stop2(85)
+     end if
+  
+     !==> pressure at interfaces. 
+     do k=1,nlevs+1 
+        pressi(:,k)=ak(k)+bk(k)*tmpgrd 
+     end do 
+     deallocate(ak,bk) 
+  
+  ! === Option TWO, NetCDF === 
+  else if (use_gfs_ncio) then
+	  
+	 call mpi_barrier(mpi_comm_world,ierr)
+
+     call read_vardata(dset, 'pressfc', values_2d,errcode=iret)
+	 
+     if (iret /= 0) then
+        print *,'error reading ps'
+        call stop2(31)
+     endif
+	 
+     psg = 0.01_r_kind*reshape(values_2d,(/nlons*nlats/))
+	 
+     ! Extract surface pressure
+     ! on reduced gaussian grid
+     call regtoreduced(psg,tmpgrd)	 
+	 
+     call read_attribute(dset, 'ak', ak)
+     call read_attribute(dset, 'bk', bk)
+  
+     ! pressure at interfaces
+     do k=1,nlevs+1
+        pressi(:,k) = 0.01_r_kind*ak(nlevs-k+2)+bk(nlevs-k+2)*tmpgrd
+		
+        if (nproc == 0) print *,'netcdf, min/max pressi',k,minval(pressi(:,k)),maxval(pressi(:,k))
+     enddo
+	 
+     deallocate(ak,bk,values_2d)  
+	 
+ ! === Option THREE, other === 	 
+ else
+	 
+    vrtspec = sigdata%ps
+    call sptez_s(vrtspec,psg,1)
+    !==> input psg is ln(ps) in centibars - convert to ps in millibars.
+    psg = 10._r_kind*exp(psg)
+    allocate(ak(nlevs+1),bk(nlevs+1))
+    if (sighead%idvc .eq. 0) then ! sigma coordinate, old file format.
+        ak = zero
+        bk = sighead%si(1:nlevs+1)
+    else if (sighead%idvc == 1) then ! sigma coordinate
+        ak = zero
+        bk = sighead%vcoord(1:nlevs+1,2)
+    else if (sighead%idvc == 2 .or. sighead%idvc == 3) then ! hybrid coordinate
+        bk = sighead%vcoord(1:nlevs+1,2)
+        ak = 0.01_r_kind*sighead%vcoord(1:nlevs+1,1)  ! convert to mb
+    else
+        print *,'unknown vertical coordinate type',sighead%idvc
+        call stop2(32)
+    end if
+
+    do k=1,nlevs+1
+       pressi(:,k)=ak(k)+bk(k)*psg
+       print *,'sigio, min/max pressi',k,minval(pressi(:,k)),maxval(pressi(:,k))
+    enddo
+	
+    deallocate(ak,bk)
+	
+ endif
+ 
+ if (use_gfs_nemsio) call nemsio_close(gfile,iret=iret)
+ if (use_gfs_ncio)   call close_dataset(dset)   
+  
+  !$omp parallel do private(k) shared(weight,pressi,grweight,nlevs) 
+  do k=1,nlevs 
+     ! sqrt(dp)*sqrt(area) 
+     weight(:,k)=sqrt( (pressi(:,k)-pressi(:,k+1))/tmpgrd(:))*grweight(:) 
+  end do
+
+  !$omp end parallel do
+  
+  
+  
+
+	 
+
+   
+  return 
+  
+ end subroutine get_weight
+ 
+
+
+
+
+
+
+
+ subroutine destroy_weight 
+  implicit none 
+  if(allocated(weight)) deallocate(weight) 
+  if(allocated(grweight)) deallocate(grweight) 
+ end subroutine destroy_weight 
+ 
+ 
+ 
+ 
+ 
+ subroutine divide_weight(grdin) 
+  implicit none 
+  real(r_single), dimension(npts_max,ncdim), intent(inout) :: grdin 
+  real(r_single) cptr,qweight,rdtrpr 
+  integer(i_kind) :: k,npt 
+  cptr = real(sqrt(tref/cp),r_kind) 
+  qweight = real(sqrt(cp*tref/wmoist)/hvap,r_kind) 
+  rdtrpr = real(sqrt(pref/(rd*tref)),r_kind) 
+  do npt=1,numptsperproc(nproc+1) 
+     do k=1,nlevs 
+        grdin(npt,k) = grdin(npt,k) / weight(indxproc(nproc+1,npt),k) 
+        grdin(npt,nlevs+k) = grdin(npt,nlevs+k) / weight(indxproc(nproc+1,npt),k) 
+        grdin(npt,2*nlevs+k) = grdin(npt,2*nlevs+k) * cptr / weight(indxproc(nproc+1,npt),k) 
+        if (nvars .gt. 3) then 
+           grdin(npt,3*nlevs+k) = grdin(npt,3*nlevs+k) * qweight / weight(indxproc(nproc+1,npt),k) 
+        end if 
+     end do 
+! AFE the indexing schema needs to be cleaned up
+!     grdin(npt,nvars*nlevs+1) = grdin(npt,nvars*nlevs+1) & 
+     grdin(npt,ncdim) = grdin(npt,ncdim) & 
+          & * rdtrpr / grweight(indxproc(nproc+1,npt)) 
+  end do 
+  return 
+ end subroutine divide_weight 
+
+
+ subroutine readgriddata_efsoi(vars3d,vars2d,n3d,n2d,levels,ndim,grdin,mode,nanal,ft,hr,infilename)
+	 
+  use sigio_module, only: sigio_head, sigio_data, sigio_sclose, sigio_sropen, &
+                          sigio_srohdc, sigio_sclose, sigio_aldata, sigio_axdata
+						  
+  use nemsio_module, only: nemsio_gfile,nemsio_open,nemsio_close,&
+                           nemsio_getfilehead,nemsio_getheadvar,nemsio_realkind,nemsio_charkind,&
+                           nemsio_readrecv,nemsio_init,nemsio_setheadvar,nemsio_writerecv
+						   
+  use module_fv3gfs_ncio, only: Dataset, Variable, Dimension, open_dataset,&
+                                read_attribute, close_dataset, get_dim, read_vardata
+					
+  implicit none
+
+  integer, intent(in), optional :: nanal
+  integer, intent(in), optional :: ft
+  integer, intent(in), optional :: hr
+  
+  character(len=100), intent(in), optional :: infilename 
+  integer, intent(in) :: mode
+  character(len=max_varname_length), dimension(n2d), intent(in) :: vars2d
+  character(len=max_varname_length), dimension(n3d), intent(in) :: vars3d
+  character(len=7) charnanal
+  integer, intent(in) :: n2d, n3d
+  integer, dimension(0:n3d), intent(in) :: levels
+  integer, intent(in) :: ndim
+  real(r_single), dimension(npts,ndim), intent(out) :: grdin
+  real(r_kind) cptr,qweight,rdtrpr
+  character(len=500) :: filename
+  character(len=3) charft
+  character(len=2) charhr
+
+  real(r_kind), dimension(nlons*nlats)          :: ug,vg
+  real(r_single), dimension(npts,nlevs)         :: tv, q, tv_to_t
+  real(r_kind), allocatable, dimension(:)       :: psg
+  
+  real(r_single),allocatable,dimension(:,:,:)   :: nems_vcoord
+  real(nemsio_realkind), dimension(nlons*nlats) :: nems_wrk,nems_wrk2
+  type(nemsio_gfile) :: gfile
+
+  integer(i_kind) :: u_ind, v_ind, tv_ind, q_ind
+  integer(i_kind) :: ps_ind
+
+  integer(i_kind) :: k,iret,idvc,nlonsin,nlatsin,nlevsin
+  
+  character(len=10) :: fileformat
+  integer(i_kind)   :: iunitsig
+  type(sigio_head)  :: sighead
+  type(sigio_data)  :: sigdata
+  type(Dataset)     :: dset
+  type(Dimension)   :: londim,latdim,levdim	  
+  real(r_kind), dimension(ndimspec) :: vrtspec,divspec	  
+  real(r_single), allocatable, dimension(:,:,:) :: ug3d,vg3d
+  real(r_single), allocatable, dimension(:,:) :: values_2d
+  integer(i_kind) :: nb,ne
+
+
+ 
+  iunitsig = 77
+  nb = 1
+  ne = 1
+
+  ! ----------------------------!
+  ! EFSOI Filename constructions!
+  ! ----------------------------!
+
+  ! Construct the Filename based on
+  ! input arguments
+  if(.not. present(infilename)) then
+     write(charft, '(i3.3)') ft
+     write(charhr, '(i2.2)') hr
+     if (nanal > 0) then
+        write(charnanal,'(a3, i3.3)') 'mem', nanal
+     else
+        charnanal = 'ensmean'
+     endif
+  endif 
+
+  ! ======================================
+  if (use_gfs_nemsio) then
+	  fileformat = '.nemsio'
+  else if (use_gfs_ncio) then
+	  fileformat = '.nc'
+  else
+	  print *,'Warning in gridio_efsoi.f90 '
+  end if
+  ! ======================================
+
+
+  ! === EFSOI filename construction =============
+  if(forecast_impact) then
+ 
+     if(mode == read_ensmean_forecast) then
+         filename = trim(adjustl(datapath))//"gdas.t"//charhr//"z.atmf"//charft// &
+               "."//trim(adjustl(charnanal))//trim(fileformat)
+     else if(mode == read_analysis_mean) then
+         filename = trim(adjustl(datapath))//"gdas.t"//charhr//"z.atmanl."// &
+               trim(adjustl(charnanal))//trim(fileformat)
+     else if(mode == read_member_forecasts) then
+        filename = trim(adjustl(datapath))//trim(adjustl(charnanal))//"/"// &
+              "gdas.t"//charhr//"z.atmf"//charft//trim(fileformat)
+     else if(mode == read_verification) then
+        filename = trim(adjustl(datapath))//infilename
+     else
+        print *,'This mode is not supported: mode=',mode
+        call stop2(23)
+     end if
+  else
+     ! Analysis Impact
+     if(mode == read_ensmean_forecast) then
+        filename = trim(adjustl(datapath))//"gdas.t"//charhr//"z.atmf"//charft// &
+              "."//trim(adjustl(charnanal))//trim(fileformat)
+     else if(mode == read_member_analyses) then
+        filename = trim(adjustl(datapath))//trim(adjustl(charnanal))//"/"//"gdas.t"// &
+              charhr//"z.atmanl"//trim(fileformat)
+     else if(mode == read_verification) then
+        filename = trim(adjustl(datapath))//infilename
+     else
+        print *,'This mode is not supported: mode=',mode
+        call stop2(23)
+     end if
+  endif         
+
+  ! --------------------------------------------!
+  ! Read in state vector from file and transform!
+  ! to EFSOI relevant quantities                !
+  ! --------------------------------------------!
+
+
+  if (nproc == 0) print *,'reading state vector file: ',filename
+
+  if (use_gfs_nemsio) then 
+	  
+     call nemsio_init(iret=iret)
+     if(iret/=0) then
+        write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_init, iret=',iret
+        call stop2(23)
+     end if
+	 
+     call nemsio_open(gfile,filename,'READ',iret=iret)
+  
+     if (iret/=0) then
+        write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_open, iret=',iret
+        call stop2(23)
+     endif
+     call nemsio_getfilehead(gfile,iret=iret, dimx=nlonsin, dimy=nlatsin,&
+                             dimz=nlevsin,idvc=idvc)
+     if (nlons /= nlonsin .or. nlats /= nlatsin .or. nlevs /= nlevsin) then
+        print *,'incorrect dims in nemsio file'
+        print *,'expected',nlons,nlats,nlevs
+        print *,'got',nlonsin,nlatsin,nlevsin
+        call stop2(23)
+     end if
+
+  else if (use_gfs_ncio) then 
+
+     dset = open_dataset(filename)
+	 
+     londim = get_dim(dset,'grid_xt'); nlonsin = londim%len
+     latdim = get_dim(dset,'grid_yt'); nlatsin = latdim%len
+     levdim = get_dim(dset,'pfull');   nlevsin = levdim%len
+     idvc=2
+	 	 	 
+  else
+	  
+     call sigio_srohdc(iunitsig,trim(filename), &
+                       sighead,sigdata,iret)
+     if (iret /= 0) then
+        print *,'error reading file in gridio ',trim(filename)
+        call stop2(23)
+     end if
+  endif
+	 
+	 
+  cptr = sqrt(cp/tref)
+  qweight = sqrt(wmoist/(cp*tref))*hvap
+  rdtrpr = sqrt(rd*tref)/pref
+
+  
+  u_ind   = getindex(vars3d, 'u')   !< indices in the state var arrays
+  v_ind   = getindex(vars3d, 'v')   ! U and V (3D)
+  tv_ind  = getindex(vars3d, 'tv')  ! Tv (3D)
+  q_ind   = getindex(vars3d, 'q')   ! Q (3D)
+  ps_ind  = getindex(vars2d, 'ps')  ! Ps (2D)
+
+
+ 
+  if (.not. isinitialized) call init_spec_vars(nlons,nlats,ntrunc,4)
+
+  allocate(psg(nlons*nlats))
+ 
+  
+  ! ======================================================
+  ! Get surface pressure
+  ! ====================
+  if (use_gfs_nemsio) then
+	  
+     call nemsio_readrecv(gfile,'pres','sfc',1,nems_wrk,iret=iret)
+     if (iret/=0) then
+        write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_readrecv(ps), iret=',iret
+        call stop2(23)
+     endif
+     psg = 0.01_r_kind*nems_wrk ! convert ps to millibars.
+
+     ! 
+     if (allocated(nems_vcoord))     deallocate(nems_vcoord)
+     allocate(nems_vcoord(nlevs+1,3,2))
+     call nemsio_getfilehead(gfile,iret=iret,vcoord=nems_vcoord)
+     if ( iret /= 0 ) then
+        write(6,*)' gridio:  ***ERROR*** problem reading header ', &
+           'vcoord, Status = ',iret
+        call stop2(99)
+     endif
+	 
+  else if (use_gfs_ncio) then
+	  
+     call read_vardata(dset, 'pressfc', values_2d,errcode=iret)
+     if (iret /= 0) then
+        print *,'error reading ps'
+        call stop2(31)
+     endif
+     psg = 0.01_r_kind*reshape(values_2d,(/nlons*nlats/)) 
+	 
+  else
+     vrtspec = sigdata%ps
+     call sptez_s(vrtspec,psg,1)
+     !==> input psg is ln(ps) in centibars - convert to ps in millibars.
+     psg = 10._r_kind*exp(psg)
+  endif 
+ 
+    
+  
+  ! ============================================================================= 
+  !  get U,V,temp,q,ps on gaussian grid.
+  ! =====================================
+  ! u is first nlevs, v is second, t is third, then tracers.
+  if (use_gfs_nemsio) then
+	  
+     do k=1,nlevs
+		
+		! Get u-wind
+        call nemsio_readrecv(gfile,'ugrd','mid layer',k,nems_wrk,iret=iret)
+		
+        if (iret/=0) then
+           write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_readrecv(ugrd), iret=',iret
+           call stop2(23)
+        endif
+		
+        ug = nems_wrk
+		
+		! Get v-wind
+        call nemsio_readrecv(gfile,'vgrd','mid layer',k,nems_wrk,iret=iret)
+        if (iret/=0) then
+            write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_readrecv(vgrd), iret=',iret
+            call stop2(23)
+        endif
+    
+        vg = nems_wrk
+    
+        if (u_ind > 0) call copytogrdin(ug,grdin(:,levels(u_ind-1) + k))
+        if (v_ind > 0) call copytogrdin(vg,grdin(:,levels(v_ind-1) + k))
+        
+		
+        ! Transformation to EFSOI relevant quantities
+        ! Assign weighted kinetic energy components. There
+        ! are no unit/metric differences for the kinetic component
+        grdin(:,levels(u_ind-1) + k) = weight(:,k) * grdin(:,levels(u_ind-1) + k)
+        grdin(:,levels(v_ind-1) + k) = weight(:,k) * grdin(:,levels(v_ind-1) + k)
+     
+        call nemsio_readrecv(gfile,'tmp','mid layer',k,nems_wrk,iret=iret)
+        if (iret/=0) then
+           write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_readrecv(tmp), iret=',iret
+           call stop2(23)
+        endif
+		
+        call nemsio_readrecv(gfile,'spfh','mid layer',k,nems_wrk2,iret=iret)
+        if (iret/=0) then
+           write(6,*)'gridio/readgriddata_efsoi: GFS: problem with nemsio_readrecv(spfh), iret=',iret
+           call stop2(23)
+        endif
+    
+        ug = nems_wrk
+        vg = nems_wrk2
+		
+        call copytogrdin(ug,tv(:,k))
+        call copytogrdin(vg, q(:,k))
+
+        ! Transformation to EFSOI relevant quantities
+        ! Mass component quantities
+        tv(:,k) = cptr * weight(:,k) * tv(:,k)
+		
+        ! Moisture component transormation to EFSOI
+        ! relevant quantities
+        q(:,k) = qweight * weight(:,k) * q(:,k)
+        tv_to_t(:,k) = ( one / (one + q(:,k)*0.61_r_kind) )
+      
+        ! Approximate the necessary transformation
+        ! of virtual temperature to temperature
+        tv(:,k) = tv(:,k) * tv_to_t(:,k)
+      
+        if (tv_ind > 0) grdin(:,levels(tv_ind-1)+k) = tv(:,k)
+        if (q_ind > 0) grdin(:,levels( q_ind-1)+k) =  q(:,k)
+
+     enddo
+	 
+  else if (use_gfs_ncio) then
+	  
+	  ! ==== Get U and V ========================================
+      call read_vardata(dset, 'ugrd', ug3d,errcode=iret)
+	  
+      if (iret /= 0) then
+         print *,'error reading ugrd'
+         call stop2(22)
+      endif
+	  
+      call read_vardata(dset, 'vgrd', vg3d,errcode=iret)
+      if (iret /= 0) then
+         print *,'error reading vgrd'
+         call stop2(23)
+      endif
+
+	  
+      do k=1,nlevs
+         ug = reshape(ug3d(:,:,nlevs-k+1),(/nlons*nlats/))
+         vg = reshape(vg3d(:,:,nlevs-k+1),(/nlons*nlats/))
+		 
+         if (u_ind > 0) call copytogrdin(ug,grdin(:,levels(u_ind-1) + k))
+         if (v_ind > 0) call copytogrdin(vg,grdin(:,levels(v_ind-1) + k))
+		 
+         ! Transformation to EFSOI relevant quantities
+         ! Assign weighted kinetic energy components. There
+         ! are no unit/metric differences for the kinetic component
+         grdin(:,levels(u_ind-1) + k) = weight(:,k) * grdin(:,levels(u_ind-1) + k)
+         grdin(:,levels(v_ind-1) + k) = weight(:,k) * grdin(:,levels(v_ind-1) + k)	 
+		 
+         ! calculate vertical integral of mass flux div (ps tendency)
+         ! this variable is analyzed in order to enforce mass balance in the analysis
+         !if (pst_ind > 0) then
+         !   ug = ug*(pressi(:,k)-pressi(:,k+1))
+         !   vg = vg*(pressi(:,k)-pressi(:,k+1))
+         !   call sptezv_s(divspec,vrtspec,ug,vg,-1) ! u,v to div,vrt
+         !   call sptez_s(divspec,vmassdiv(:,k),1) ! divspec to divgrd
+         !endif
+      enddo
+	  
+ 
+	  
+	  ! ==== Get T and Q ===========================================
+      call read_vardata(dset,'tmp', ug3d,errcode=iret)
+	  
+      if (iret /= 0) then
+         print *,'error reading tmp'
+         call stop2(24)
+      endif
+	  
+      call read_vardata(dset,'spfh', vg3d,errcode=iret)
+	  
+      if (iret /= 0) then
+         print *,'error reading spfh'
+         call stop2(25)
+      endif	   
+	  	  
+      do k=1,nlevs
+         ug = reshape(ug3d(:,:,nlevs-k+1),(/nlons*nlats/))
+         vg = reshape(vg3d(:,:,nlevs-k+1),(/nlons*nlats/))
+         
+		 !if (tsen_ind > 0) call copytogrdin(ug,grdin(:,levels(tsen_ind-1)+k,nb,ne))
+		 	 
+         call copytogrdin(vg, q(:,k))
+		 
+         ug = ug * ( 1.0 + fv*vg ) ! convert T to Tv
+         ! 
+         call copytogrdin(ug,tv(:,k))
+		 
+         ! Transformation to EFSOI relevant quantities
+         ! Mass component quantities
+         tv(:,k) = cptr * weight(:,k) * tv(:,k)
+         ! Moisture component transormation to EFSOI
+         ! relevant quantities
+         q(:,k) = qweight * weight(:,k) * q(:,k)
+         tv_to_t(:,k) = ( one / (one + q(:,k)*0.61_r_kind) )
+    
+         ! Approximate the necessary transformation
+         ! of virtual temperature to temperature
+         tv(:,k) = tv(:,k) * tv_to_t(:,k)
+		 		 
+         if (tv_ind > 0)   grdin(:,levels(tv_ind-1)+k) = tv(:,k)
+         if (q_ind > 0)    grdin(:,levels( q_ind-1)+k) =  q(:,k)
+      enddo
+	  
+      deallocate(ug3d,vg3d)
+	  
+  else
+	  
+	 ! $omp parallel do private(k,ug,vg,divspec,vrtspec) 
+	 ! shared(sigdata,pressi,vmassdiv,grdin,tv,q,cw,u_ind,v_ind,pst_ind,q_ind,tsen_ind,cw_ind,qi_ind,ql_ind)
+	 do k=1,nlevs
+
+	    vrtspec = sigdata%z(:,k); divspec = sigdata%d(:,k)
+	    call sptezv_s(divspec,vrtspec,ug,vg,1)
+	    if (u_ind > 0) then
+	       call copytogrdin(ug,grdin(:,levels(u_ind-1)+k))
+	    endif
+	    if (v_ind > 0) then
+	       call copytogrdin(vg,grdin(:,levels(v_ind-1)+k))
+	    endif
+
+	    ! calculate vertical integral of mass flux div (ps tendency)
+	    ! this variable is analyzed in order to enforce mass balance in the analysis
+	    !if (pst_ind > 0) then
+	    !   ug = ug*(pressi(:,k)-pressi(:,k+1))
+	    !   vg = vg*(pressi(:,k)-pressi(:,k+1))
+	    !   call sptezv_s(divspec,vrtspec,ug,vg,-1) ! u,v to div,vrt
+	    !   call sptez_s(divspec,vmassdiv(:,k),1) ! divspec to divgrd
+	    !endif
+
+	    divspec = sigdata%t(:,k)
+	    call sptez_s(divspec,ug,1)
+	    call copytogrdin(ug,tv(:,k))
+	    
+	    divspec = sigdata%q(:,k,1)
+	    call sptez_s(divspec,vg,1)
+	    call copytogrdin(vg,q(:,k))
+		
+        ! Transformation to EFSOI relevant quantities
+        ! Mass component quantities
+        tv(:,k) = cptr * weight(:,k) * tv(:,k)
+        ! Moisture component transormation to EFSOI
+        ! relevant quantities
+        q(:,k) = qweight * weight(:,k) * q(:,k)
+        tv_to_t(:,k) = ( one / (one + q(:,k)*0.61_r_kind) )
+   
+        ! Approximate the necessary transformation
+        ! of virtual temperature to temperature
+        tv(:,k) = tv(:,k) * tv_to_t(:,k)
+				
+		if (tv_ind > 0)          grdin(:,levels(tv_ind-1)+k) = tv(:,k)
+	    if (q_ind > 0)           grdin(:,levels( q_ind-1)+k) =  q(:,k)	  	  
+	  
+     enddo
+  endif
+  	  	  
+
+  ! =============================================================
+  ! surface pressure 
+  ! ================
+  if (ps_ind > 0) then
+    call copytogrdin(psg,grdin(:,levels(n3d) + ps_ind))
+  endif
+
+  ! Transformation to EFSOI relevant quantities
+  ! Surface pressure contribution
+  grdin(:,levels(n3d) + ps_ind) = rdtrpr * grweight(:) * 100._r_kind * grdin(:,levels(n3d) + ps_ind)
+  
+  deallocate(psg)
+ 
+  if (use_gfs_nemsio) call nemsio_close(gfile,iret=iret)
+  if (use_gfs_ncio)   call close_dataset(dset)
+
+  return
+ 
+  contains
+	  
+  ! copying to grdin (calling regtoreduced if reduced grid)
+  subroutine copytogrdin(field, grdin)
+  implicit none
+
+  real(r_kind), dimension(:), intent(in)      :: field
+  real(r_single), dimension(:), intent(inout) :: grdin
+
+  call regtoreduced(field, grdin)
+
+  end subroutine copytogrdin
+
+ end subroutine readgriddata_efsoi
+
+end module gridio_efsoi
diff --git a/util/EFSOI_Utilities/src/loadbal_efsoi.f90 b/util/EFSOI_Utilities/src/loadbal_efsoi.f90
new file mode 100644
index 0000000000..778468a469
--- /dev/null
+++ b/util/EFSOI_Utilities/src/loadbal_efsoi.f90
@@ -0,0 +1,380 @@
+module loadbal_efsoi
+!$$$  module documentation block
+!
+! module: loadbal          decompose ob priors and horizontal grid points
+!                          to minimize load imbalance. Creates various
+!                          arrays that define the decomposition.
+!
+! prgmmr: whitaker         org: esrl/psd               date: 2009-02-23
+!         groff            org: emc                    date: 2018-09-03
+! abstract: 
+!
+! Public Subroutines:
+!  load_balance_efsoi: set up decomposition (for ob priors and analysis grid points)
+!   that minimizes load imbalance.  
+!   The decomposition uses "Graham's rule", which simply
+!   stated, assigns each new work item to the task that currently has the 
+!   smallest load.
+!  loadbal_cleanup_efsoi: deallocate allocated arrays.
+!
+! Private Subroutines:
+!  estimate_work_efsoi: estimate work needed to update each analysis grid
+!   point (considering all the observations within the localization radius).
+! Public Variables (all defined by subroutine load_balance_efsoi):
+!  npts_min: (integer scalar) smallest number of grid points assigned to a task.
+!  npts_max: (integer scalar) maximum number of grid points assigned to a task.
+!  nobs_min: (integer scalar, serial enkf only) smallest number of observation priors assigned to a task.
+!  nobs_max: (integer scalar, serial enkf only) maximum number of observation priors assigned to a task.
+!  numproc: (integer scalar) total number of MPI tasks (from module mpisetup)
+!  nobstot: (integer scalar) total number of obs to be assimilated (from module
+!   enkf_obsmod).
+!  numobsperproc(numproc): (serial enkf only) integer array containing # of ob priors 
+!   assigned to each task.
+!  numptsperproc(numproc): integer array containing # of grid points assigned to
+!   each task.
+!  indxproc(numproc,npts_max): integer array with the indices (1,2,...npts) of 
+!   analysis grid points assigned to each task.
+!  indxproc_obs(numproc,nobs_max): (serial enkf only) integer array with the indices
+!   (1,2,...nobstot) of observation priors assigned to that task.
+!  iprocob(nobstot): (serial enkf only) integer array containing the task number that has been
+!   assigned to update each observation prior.
+!  indxob_chunk(nobstot): (serial enkf only) integer array that maps the index of the ob priors
+!   being assimilated (1,2,3...nobstot) to the index of the obs being 
+!   updated on that task (1,2,3,...numobsperproc(nproc)) - inverse of
+!   indxproc_obs.
+!  ensmean_obchunk(nobs_max): (serial enkf only) real array of ensemble mean observation priors
+!   being updated on that task (use indxproc_obs to find
+!   corresponding index in ensemble_ob).
+!  obloc_chunk(3,nobs_max): (serial enkf only) real array of spherical cartesian coordinates
+!   of ob priors being updated on this task.
+!  grdloc_chunk(3,npts_max): real array of spherical cartesian coordinates
+!   of analysis grid points being updated on this task.
+!  lnp_chunk(npts_max,ncdim): real array of log(pressures) of control variables
+!   being updated on this task.
+!  oblnp_chunk(nobs_max,ncdim): (serial enkf only) real array of log(pressures) of ob priors
+!   being updated on this task.
+!  obtime_chunk(nobs_max): (serial enkf only) real array of ob times of ob priors
+!   being updated on this task (expressed as an offset from the analysis time in
+!   hours).
+!  anal_obchunk_prior(nanals,nobs_max): (serial enkf only) real array of observation prior 
+!   ensemble perturbations to be updated on this task (not used in LETKF).
+!  kdtree_grid: pointer to kd-tree structure used for nearest neighbor searches
+!   for model grid points (only searches grid points assigned to this task).
+!  kdtree_obs: pointer to kd-tree structure used for nearest neighbor searches
+!   for observations (only searches ob locations assigned to this task).
+!  kdtree_obs2: (LETKF only) pointer to kd-tree structure used for nearest neighbor searches
+!   for observations (searches all observations)
+!  anal_chunk(nanals,npts_max,ncdim,nbackgrounds): real array of ensemble perturbations
+!   updated on each task.
+!  anal_chunk_prior(nanals,npts_max,ncdim,nbackgrounds): real array of prior ensemble
+!   perturbations.  Before analysis anal_chunk=anal_chunk_prior, after
+!   analysis anal_chunk contains posterior perturbations.
+!  ensmean_chunk(npts_max,ncdim,nbackgrounds): real array containing pieces of ensemble
+!   mean to be updated on each task.
+!  ensmean_chunk_prior(npts_max,ncdim,nbackgrounds): as above, for ensemble mean prior.
+!   Before analysis ensmean_chunk=ensmean_chunk_prior, after analysis
+!   ensmean_chunk contains posterior ensemble mean.
+!   
+!
+! Modules Used: mpisetup, params, kinds, constants, enkf_obsmod, gridinfo,
+!               kdtree_module, covlocal, controlvec
+!
+! program history log:
+!   2009-02-23  Initial version.
+!   2011-06-21  Added the option of observation box selection for LETKF.
+!   2015-07-25  Remove observation box selection (use kdtree instead).
+!   2016-05-02  shlyaeva: modification for reading state vector from table
+!   2016-09-07  shlyaeva: moved distribution of chunks here from controlvec
+!
+! attributes:
+!   language: f95
+!
+!$$$
+
+use mpisetup
+use params, only: datapath, nanals, simple_partition, &
+                  corrlengthnh, corrlengthsh, corrlengthtr, lupd_obspace_serial,&
+                  efsoi_flag
+use enkf_obsmod, only: nobstot, obloc, oblnp, ensmean_ob, obtime, anal_ob, corrlengthsq
+use kinds, only: r_kind, i_kind, r_double, r_single
+use kdtree2_module, only: kdtree2, kdtree2_create, kdtree2_destroy, &
+                          kdtree2_result, kdtree2_r_nearest
+use gridinfo, only: gridloc, logp, latsgrd, nlevs_pres, npts
+use constants, only: zero, rad2deg, deg2rad
+
+implicit none
+private
+public :: load_balance_efsoi, loadbal_cleanup_efsoi
+
+real(r_single),public, allocatable, dimension(:,:) :: lnp_chunk, &
+                                                      anal_obchunk_prior
+real(r_single),public, allocatable, dimension(:,:,:) :: anal_chunk, anal_chunk_prior
+real(r_single),public, allocatable, dimension(:,:) :: ensmean_chunk, ensmean_chunk_prior
+
+! arrays passed to kdtree2 routines need to be single
+real(r_single),public, allocatable, dimension(:,:) :: obloc_chunk, grdloc_chunk
+real(r_single),public, allocatable, dimension(:) :: oblnp_chunk, &
+ obtime_chunk, ensmean_obchunk
+integer(i_kind),public, allocatable, dimension(:) :: iprocob, indxob_chunk,&
+                          numptsperproc, numobsperproc
+integer(i_kind),public, allocatable, dimension(:,:) :: indxproc, indxproc_obs
+integer(i_kind),public :: npts_min, npts_max, nobs_min, nobs_max
+integer(8) totsize
+! kd-tree structures.
+type(kdtree2),public,pointer :: kdtree_obs, kdtree_grid, kdtree_obs2
+
+contains
+
+subroutine load_balance_efsoi()
+! set up decomposition (for analysis grid points, and ob priors in serial EnKF)
+! that minimizes load imbalance. 
+! Uses "Graham's rule", which simply
+! stated, assigns each new work item to the task that currently has the 
+! smallest load.
+implicit none
+integer(i_kind), allocatable, dimension(:) :: rtmp,numobs
+integer(i_kind) np,i,n,nn,nob1,nob2,ierr
+real(r_double) t1
+logical test_loadbal
+
+! partition state vector for using Grahams rule..
+! ("When a new job arrives, allocate it to the server 
+! that currently has the smallest load")
+allocate(numobs(npts))
+allocate(numptsperproc(numproc))
+allocate(rtmp(numproc))
+t1 = mpi_wtime()
+! assume work load proportional to number of 'nearby' obs
+call estimate_work_efsoi(numobs) ! fill numobs array with number of obs per horiz point
+! distribute the results of estimate_work to all processors.
+call mpi_allreduce(mpi_in_place,numobs,npts,mpi_integer,mpi_sum,mpi_comm_world,ierr)
+if (nproc == 0) print *,'time in estimate_work_efsoi = ',mpi_wtime()-t1,' secs'
+if (nproc == 0) print *,'min/max numobs',minval(numobs),maxval(numobs)
+! loop over horizontal grid points on analysis grid.
+t1 = mpi_wtime()
+rtmp = 0
+numptsperproc = 0
+np = 0
+test_loadbal = .false. ! simple partition for testing
+! 
+!PRINT *, npts, 'npts load bal'
+do n=1,npts
+   if (test_loadbal) then
+       ! use simple partition (does not use estimated workload) for testing
+       np = np + 1
+       if (np > numproc) np = 1
+   else
+       np = minloc(rtmp,dim=1)
+       ! np is processor with the fewest number of obs to process
+       ! add this grid point to list for nmin
+       rtmp(np) = rtmp(np)+numobs(n)
+   endif
+   numptsperproc(np) = numptsperproc(np)+1
+   !PRINT *, numptsperproc, 'loadbal nptsperpr'
+    
+
+
+end do
+
+
+npts_max = maxval(numptsperproc)
+npts_min = minval(numptsperproc)
+
+allocate(indxproc(numproc,npts_max))
+! indxproc(np,i) is i'th horiz grid index for processor np.
+! there are numptsperpoc(np) i values for processor np
+rtmp = 0
+numptsperproc = 0
+np = 0
+do n=1,npts
+   if (test_loadbal) then
+       ! use simple partition (does not use estimated workload) for testing
+       np = np + 1
+       if (np > numproc) np = 1
+   else
+       np = minloc(rtmp,dim=1)
+       rtmp(np) = rtmp(np)+numobs(n)
+   endif
+   numptsperproc(np) = numptsperproc(np)+1 ! recalculate
+   indxproc(np,numptsperproc(np)) = n
+end do
+
+!PRINT *, numptsperproc, 'second time'
+! print estimated workload for each task
+if (nproc == 0) then
+   do np=1,numproc
+      rtmp(np) = 0
+      do n=1,numptsperproc(np)
+         rtmp(np) = rtmp(np) + numobs(indxproc(np,n))
+      enddo
+	  
+  	  
+   enddo
+   
+   print *,'min/max estimated work ',&
+   minval(rtmp),maxval(rtmp)
+
+endif
+deallocate(rtmp,numobs)
+
+if (nproc == 0) then
+    print *,'npts = ',npts
+    print *,'min/max number of points per proc = ',npts_min,npts_max
+    print *,'time to do model space decomp = ',mpi_wtime()-t1
+end if
+
+! setup arrays to hold subsets of grid information for each task.
+allocate(grdloc_chunk(3,numptsperproc(nproc+1)))
+allocate(lnp_chunk(numptsperproc(nproc+1),nlevs_pres))
+do i=1,numptsperproc(nproc+1)
+   grdloc_chunk(:,i) = gridloc(:,indxproc(nproc+1,i))
+   do nn=1,nlevs_pres
+      lnp_chunk(i,nn) = logp(indxproc(nproc+1,i),nn)
+   end do
+end do
+
+allocate(numobsperproc(numproc))
+allocate(iprocob(nobstot))
+! default is to partition obs simply, since
+! speed up from using Graham's rule for observation process
+! often does not justify cost of estimating workload in ob space.
+
+! distribute obs without trying to estimate workload
+t1 = mpi_wtime()
+numobsperproc = 0
+np=0
+do n=1,nobstot
+   np=np+1
+   if(np > numproc)np = 1
+   numobsperproc(np) = numobsperproc(np)+1
+   iprocob(n) = np-1
+enddo
+nobs_min = minval(numobsperproc)
+nobs_max = maxval(numobsperproc)
+allocate(indxproc_obs(numproc,nobs_max))
+numobsperproc = 0
+do n=1,nobstot
+   np=iprocob(n)+1
+   numobsperproc(np) = numobsperproc(np)+1 ! recalculate
+   ! indxproc_obs(np,i) is i'th ob index for processor np.
+   ! there are numobsperpoc(np) i values for processor np
+   indxproc_obs(np,numobsperproc(np)) = n
+end do
+if (nproc == 0) then
+   print *,'nobstot = ',nobstot
+   print *,'min/max number of obs per proc = ',nobs_min,nobs_max
+   print *,'time to do ob space decomp = ',mpi_wtime()-t1
+end if
+! for serial enkf, send out observation priors to be updated on each processor.
+allocate(anal_obchunk_prior(nanals,nobs_max))
+if(nproc == 0) then
+   print *,'sending out observation prior ensemble perts from root ...'
+   totsize = nobstot
+   totsize = totsize*nanals
+   print *,'nobstot*nanals',totsize
+   t1 = mpi_wtime()
+   ! send one big message to each task.
+   do np=1,numproc-1
+      do nob1=1,numobsperproc(np+1)
+         nob2 = indxproc_obs(np+1,nob1)
+         anal_obchunk_prior(1:nanals,nob1) = anal_ob(1:nanals,nob2)
+      end do
+      call mpi_send(anal_obchunk_prior,nobs_max*nanals,mpi_real4,np, &
+           1,mpi_comm_world,ierr)
+   end do
+   ! anal_obchunk_prior on root (no send necessary)
+   do nob1=1,numobsperproc(1)
+      nob2 = indxproc_obs(1,nob1)
+      anal_obchunk_prior(1:nanals,nob1) = anal_ob(1:nanals,nob2)
+   end do
+      ! now we don't need anal_ob anymore for serial EnKF.
+      if (.not. lupd_obspace_serial) deallocate(anal_ob)
+else
+   ! recv one large message on each task.
+   call mpi_recv(anal_obchunk_prior,nobs_max*nanals,mpi_real4,0, &
+        1,mpi_comm_world,mpi_status,ierr)
+end if
+call mpi_barrier(mpi_comm_world, ierr)
+if(nproc == 0) print *,'... took ',mpi_wtime()-t1,' secs'
+! these arrays only needed for serial filter
+! nob1 is the index of the obs to be processed on this rank
+! nob2 maps nob1 to 1:nobstot array (nobx)
+allocate(obloc_chunk(3,numobsperproc(nproc+1)))
+allocate(oblnp_chunk(numobsperproc(nproc+1)))
+allocate(obtime_chunk(numobsperproc(nproc+1)))
+allocate(ensmean_obchunk(numobsperproc(nproc+1)))
+allocate(indxob_chunk(nobstot))
+indxob_chunk = -1
+do nob1=1,numobsperproc(nproc+1)
+   nob2 = indxproc_obs(nproc+1,nob1)
+   oblnp_chunk(nob1) = oblnp(nob2)
+   obtime_chunk(nob1) = obtime(nob2)
+   indxob_chunk(nob2) = nob1
+   ensmean_obchunk(nob1) = ensmean_ob(nob2)
+   obloc_chunk(:,nob1) = obloc(:,nob2)
+enddo
+! set up kd-tree for efsoi to search only the subset
+! of gridpoints, obs to be updated on this processor..
+if (numptsperproc(nproc+1) >= 3 .and. .not. lupd_obspace_serial .and. .not. efsoi_flag) then
+   kdtree_grid => kdtree2_create(grdloc_chunk,sort=.false.,rearrange=.true.)
+endif
+if (numobsperproc(nproc+1) >= 3) then
+   kdtree_obs  => kdtree2_create(obloc_chunk,sort=.false.,rearrange=.true.)
+end if
+
+end subroutine load_balance_efsoi
+
+subroutine estimate_work_efsoi(numobs)
+! estimate work needed to update each analysis grid
+! point (considering all the observations within the localization radius).
+use covlocal, only:  latval
+
+implicit none
+integer(i_kind), dimension(:), intent(inout) :: numobs
+real(r_single) :: deglat,corrlength,corrsq
+type(kdtree2_result),dimension(:),allocatable :: sresults
+
+integer nob,n1,n2,i,ideln
+
+ideln = int(real(npts)/real(numproc))
+n1 = 1 + nproc*ideln
+n2 = (nproc+1)*ideln
+if (nproc == numproc-1) n2 = npts
+
+! loop over 'good' obs.
+numobs = 1 ! set min # of obs to 1, not 0 (so single ob test behaves)
+!$omp parallel do  schedule(dynamic,1) private(nob,i,deglat,corrlength,sresults,corrsq)
+obsloop: do i=n1,n2
+       do nob=1,nobstot
+          if (sum((obloc(1:3,nob)-gridloc(1:3,i))**2,1) < corrlengthsq(nob)) &
+            numobs(i) = numobs(i) + 1
+       end do 
+end do obsloop
+!$omp end parallel do
+
+end subroutine estimate_work_efsoi
+
+subroutine loadbal_cleanup_efsoi()
+! deallocate module-level allocatable arrays
+if (allocated(anal_chunk)) deallocate(anal_chunk)
+if (allocated(anal_chunk_prior)) deallocate(anal_chunk_prior)
+if (allocated(ensmean_chunk)) deallocate(ensmean_chunk)
+if (allocated(ensmean_chunk_prior)) deallocate(ensmean_chunk_prior)
+if (allocated(obloc_chunk)) deallocate(obloc_chunk)
+if (allocated(grdloc_chunk)) deallocate(grdloc_chunk)
+if (allocated(lnp_chunk)) deallocate(lnp_chunk)
+if (allocated(oblnp_chunk)) deallocate(oblnp_chunk)
+if (allocated(obtime_chunk)) deallocate(obtime_chunk)
+if (allocated(ensmean_obchunk)) deallocate(ensmean_obchunk)
+if (allocated(iprocob)) deallocate(iprocob)
+if (allocated(indxob_chunk)) deallocate(indxob_chunk)
+if (allocated(indxproc_obs)) deallocate(indxproc_obs)
+if (allocated(numptsperproc)) deallocate(numptsperproc)
+if (allocated(numobsperproc)) deallocate(numobsperproc)
+if (allocated(indxproc)) deallocate(indxproc)
+if (associated(kdtree_obs)) call kdtree2_destroy(kdtree_obs)
+if (associated(kdtree_obs2)) call kdtree2_destroy(kdtree_obs2)
+if (associated(kdtree_grid)) call kdtree2_destroy(kdtree_grid)
+end subroutine loadbal_cleanup_efsoi
+
+end module loadbal_efsoi
diff --git a/util/EFSOI_Utilities/src/loc_advection.f90 b/util/EFSOI_Utilities/src/loc_advection.f90
new file mode 100644
index 0000000000..f81f4e0665
--- /dev/null
+++ b/util/EFSOI_Utilities/src/loc_advection.f90
@@ -0,0 +1,100 @@
+module loc_advection
+
+use mpisetup
+use gridinfo, only: latsgrd,lonsgrd,nlevs_pres,npts
+use statevec_efsoi, only: id_u,id_v
+use loadbal_efsoi, only: numptsperproc, indxproc, kdtree_grid
+use scatter_chunks_efsoi
+use kinds
+use params
+use constants
+use kdtree2_module
+implicit none
+
+private
+public loc_advection_efsoi, adloc_chunk
+real(r_single),allocatable,dimension(:,:) :: adloc_chunk
+
+contains
+
+subroutine loc_advection_efsoi
+!$$$  subprogram documentation block
+!                .      .    .                                       .
+! subprogram:    loc_advection
+!   prgmmr: ota
+!
+! abstract: computes analysis grid point location with simple horizontal
+!   advection.
+!
+! program history log:
+!   2011-09-16  ota    - created
+!
+!   input argument list:
+!
+!   output argument list:
+!
+! attributes:
+!   language: f90
+!   machine:
+!
+!$$$ end documentation block
+  implicit none
+  real(r_kind),allocatable,dimension(:) :: coslat
+  real(r_kind) :: adtime, halfpi, pi2, adlon, adlat
+  integer(i_kind) :: npt, nn, nnu, nnv, nnpt
+  allocate(adloc_chunk(3,numptsperproc(nproc+1)*nlevs_pres))
+  allocate(coslat(numptsperproc(nproc+1)))
+  adtime = adrate*real(eft,r_kind)*3600._r_kind/rearth
+  halfpi = half * pi
+  pi2 = 2._r_kind * pi
+!$omp parallel private(npt)
+!$omp do
+  do npt=1,numptsperproc(nproc+1)
+     coslat(npt) = 1._r_kind/cos(latsgrd(indxproc(nproc+1,npt)))
+  end do
+!$omp end do
+!$omp end parallel
+!$omp parallel private(nn,nnu,nnv,npt,nnpt,adlon,adlat)
+!$omp do
+  do nn=1,nlevs_pres
+     if(nn == nlevs_pres) then
+        nnu = id_u(1)
+        nnv = id_v(1)
+     else
+        nnu = id_u(nn)
+        nnv = id_v(nn)
+     end if
+     do npt=1,numptsperproc(nproc+1)
+        nnpt = nlevs_pres * (npt-1) + nn
+        adlon = lonsgrd(indxproc(nproc+1,npt)) &
+             & - half * (ensmean_chunk(npt,nnu) + analmean_chunk(npt,nnu)) &
+             & * coslat(npt) * adtime
+        adlat = latsgrd(indxproc(nproc+1,npt)) &
+             & - half * (ensmean_chunk(npt,nnv) + analmean_chunk(npt,nnv)) &
+             & * adtime
+        if(adlat > halfpi) then
+           adlat = pi - adlat
+           adlon = adlon + pi
+        else if(adlat < -halfpi) then
+           adlat = -pi - adlat
+           adlon = adlon + pi
+        end if
+        if(adlon > pi2) then
+           adlon = mod(adlon,pi2)
+        else if(adlon < zero) then
+           adlon = mod(adlon,pi2) + pi2
+        end if
+        adloc_chunk(1,nnpt) = cos(adlat)*cos(adlon)
+        adloc_chunk(2,nnpt) = cos(adlat)*sin(adlon)
+        adloc_chunk(3,nnpt) = sin(adlat)
+     end do
+  end do
+!$omp end do
+!$omp end parallel
+  deallocate(coslat)
+  ! kd-tree
+  kdtree_grid => kdtree2_create(adloc_chunk,sort=.false.,rearrange=.true.)
+  return
+end subroutine loc_advection_efsoi
+
+end module loc_advection
diff --git a/util/EFSOI_Utilities/src/scatter_chunks_efsoi.f90 b/util/EFSOI_Utilities/src/scatter_chunks_efsoi.f90
new file mode 100644
index 0000000000..aacb82347c
--- /dev/null
+++ b/util/EFSOI_Utilities/src/scatter_chunks_efsoi.f90
@@ -0,0 +1,153 @@
+module scatter_chunks_efsoi
+
+use mpisetup, only: numproc, nproc, mpi_real4
+use mpimod, only : mpi_comm_world
+use kinds, only: i_kind, r_single, r_kind
+use loadbal_efsoi, only: npts_max, numptsperproc, indxproc
+use params, only: nanals
+use gridio_efsoi
+
+! Fill in documentation here
+implicit none
+private
+public :: scatter_chunks_ob_impact
+real(r_kind),public, allocatable, dimension(:,:,:) :: anal_chunk 
+real(r_single),public, allocatable, dimension(:,:) :: ensmean_chunk 
+real(r_single),public, allocatable, dimension(:,:) :: fcerror_chunk, analmean_chunk
+contains
+
+subroutine scatter_chunks_ob_impact
+! distribute chunks from grdin (read in controlvec) according to
+! decomposition from load_balance
+use statevec_efsoi, only: grdin,grdin3,grdin5
+use gridio_efsoi, only: ncdim
+implicit none
+
+integer(i_kind), allocatable, dimension(:) :: scounts, displs, rcounts
+real(r_single), allocatable, dimension(:) :: sendbuf,recvbuf,sendbuf2,recvbuf2
+integer(i_kind) :: np, nn, n, nanal, i, ierr
+
+
+allocate(scounts(0:numproc-1))
+allocate(displs(0:numproc-1))
+allocate(rcounts(0:numproc-1))
+! only IO tasks send any data.
+! scounts is number of data elements to send to processor np.
+! rcounts is number of data elements to recv from processor np.
+! displs is displacement into send array for data to go to proc np
+do np=0,numproc-1
+   displs(np) = np*npts_max*ncdim
+enddo
+if (nproc <= nanals-1) then
+   scounts = npts_max*ncdim
+else
+   scounts = 0
+endif
+! displs is also the displacement into recv array for data to go into anal_chunk
+! on
+! task np.
+do np=0,numproc-1
+   if (np <= nanals-1) then
+      rcounts(np) = npts_max*ncdim
+   else
+      rcounts(np) = 0
+   end if
+enddo
+
+! allocate array to hold pieces of state vector on each proc.
+allocate(anal_chunk(nanals,npts_max,ncdim))
+if (nproc == 0) print *,'anal_chunk size = ',size(anal_chunk)
+
+allocate(ensmean_chunk(npts_max,ncdim))
+ensmean_chunk = 0.
+allocate(sendbuf(numproc*npts_max*ncdim))
+allocate(recvbuf(numproc*npts_max*ncdim))
+
+! send and receive buffers.
+if (nproc <= nanals-1) then
+   ! fill up send buffer.
+   do np=1,numproc
+     do nn=1,ncdim
+      do i=1,numptsperproc(np)
+       n = ((np-1)*ncdim + (nn-1))*npts_max + i
+       sendbuf(n) = grdin(indxproc(np,i),nn)
+     enddo
+    enddo
+   enddo
+end if
+
+call mpi_alltoallv(sendbuf, scounts, displs, mpi_real4, recvbuf,rcounts, displs,&
+                   mpi_real4, mpi_comm_world, ierr)
+
+!==> compute ensemble of first guesses on each task, remove mean from anal.
+!$omp parallel do schedule(dynamic,1)  private(nn,i,nanal,n)
+do nn=1,ncdim
+   do i=1,numptsperproc(nproc+1)
+	   
+      do nanal=1,nanals
+         n = ((nanal-1)*ncdim + (nn-1))*npts_max + i
+         anal_chunk(nanal,i,nn) = recvbuf(n)
+      enddo
+	  
+      ensmean_chunk(i,nn) = sum(anal_chunk(:,i,nn))/float(nanals)
+	  
+      ! remove mean from ensemble.
+      do nanal=1,nanals
+         anal_chunk(nanal,i,nn) = anal_chunk(nanal,i,nn)-ensmean_chunk(i,nn)
+      end do
+	  
+   end do
+end do
+!$omp end parallel do
+
+deallocate(sendbuf, recvbuf)
+
+! Get forecast error at the evaluation time and distribute them to all
+! processors
+allocate(fcerror_chunk(npts_max,ncdim))
+allocate(analmean_chunk(npts_max,ncdim))
+allocate(sendbuf(numproc*npts_max*ncdim))
+allocate(sendbuf2(numproc*npts_max*ncdim))
+allocate(recvbuf(npts_max*ncdim))
+allocate(recvbuf2(npts_max*ncdim))
+if(nproc == 0) then
+   ! Assign sendbuf recbuf from state variables
+   do np=1,numproc
+      do nn=1,ncdim
+         do i=1,numptsperproc(np)
+            n = ((np-1)*ncdim + (nn-1))*npts_max + i
+            sendbuf(n) = grdin5(indxproc(np,i),nn)
+            sendbuf2(n) = grdin3(indxproc(np,i),nn)
+         end do
+      end do
+   end do
+   call mpi_scatter(sendbuf,ncdim*npts_max,mpi_real4,recvbuf, &
+        & ncdim*npts_max,mpi_real4,0,mpi_comm_world,ierr)
+   call mpi_scatter(sendbuf2,ncdim*npts_max,mpi_real4,recvbuf2, &
+        & ncdim*npts_max,mpi_real4,0,mpi_comm_world,ierr)
+else
+   call mpi_scatter(sendbuf,ncdim*npts_max,mpi_real4,recvbuf, &
+        & ncdim*npts_max,mpi_real4,0,mpi_comm_world,ierr)
+   call mpi_scatter(sendbuf2,ncdim*npts_max,mpi_real4,recvbuf2, &
+        & ncdim*npts_max,mpi_real4,0,mpi_comm_world,ierr)
+end if
+
+!$omp parallel do schedule(dynamic,1)  private(nn,i,n) 
+
+   do nn=1,ncdim
+      do i=1,numptsperproc(nproc+1)
+         n = (nn-1)*npts_max + i
+         fcerror_chunk(i,nn) = recvbuf2(n)
+         analmean_chunk(i,nn) = recvbuf(n)
+      end do
+   end do
+   deallocate(sendbuf,recvbuf,sendbuf2,recvbuf2)
+   if(allocated(grdin5)) deallocate(grdin5)
+   call divide_weight(analmean_chunk)
+   call divide_weight(ensmean_chunk(:,:))
+
+call destroy_weight()
+
+end subroutine scatter_chunks_ob_impact
+
+end module scatter_chunks_efsoi
diff --git a/util/EFSOI_Utilities/src/statevec_efsoi.f90 b/util/EFSOI_Utilities/src/statevec_efsoi.f90
new file mode 100644
index 0000000000..9a6af47f7e
--- /dev/null
+++ b/util/EFSOI_Utilities/src/statevec_efsoi.f90
@@ -0,0 +1,319 @@
+module statevec_efsoi
+!$$$  module documentation block
+!
+! module: statevec_efsoi       read ensemble members, ensemble mean forecasts,
+!                              ensemble mean analysis and verification. 
+!                              Assign and compute forecast perturbations
+!                              and ensemble mean forecast errors from 
+!                              information read
+!                              
+! prgmmr: whitaker         org: esrl/psd               date: 2009-02-23
+! prgmmr: groff            org: emc                    date: 2018-05-14
+!
+! abstract: io needed for efsoi calculations
+!
+! Public Subroutines:
+!  init_statevec_efsoi: read anavinfo table for defining EFSOI state vector
+!  read_state_efsoi:    read ensemble members, ensemble mean forecasts, 
+!                       ensemble mean analyses and verification.  Assign 
+!                       and compute forecast perturbations and forecast errors 
+!                       from information read.
+!  statevec_efsoi_cleanup: deallocate allocatable arrays.
+!
+! Public Variables:
+!  nanals: (integer scalar) number of ensemble members (from module params)
+!  nlevs: number of analysis vertical levels (from module params).
+!
+!  nc3d: number of 3D control variables
+!  nc2d: number of 2D control variables
+!  cvars3d: names of 3D control variables
+!  cvars2d: names of 2D control variables
+!  ncdim: total number of 2D fields to update (nc3d*nlevs+nc2d)
+!  index_pres: an index array with pressure value for given state variable
+!   
+! Modules Used: mpisetup, params, kinds, gridio, gridinfo_efsoi, mpeu_util, constants
+!
+! program history log:
+!   2009-02-23  Initial version (as statevec).
+!   2009-11-28  revamped to improve IO speed
+!   2015-06-29  add multiple time levels to background
+!   2016-05-02  shlyaeva: Modification for reading state vector from table
+!   2016-09-07  shlyaeva: moved distribution of ens members to loadbal 
+!   2016-11-29  shlyaeva: module renamed to controlvec (from statevec); gridinfo_efsoi
+!               init and cleanup are called from here now
+!   2018-05-14  Groff: Adapted from enkf controlvec.f90 to provide
+!               io functionality necessary for efsoi calculations
+!   2021-03-04  Eichmann: updated to work with FV3 GFS
+!   2021-03-15  Eichmann: eliminated gridinfo_efsoi for src/enkf version 
+!
+! attributes:
+!   language: f95
+!
+!$$$
+
+use mpisetup, only: numproc,nproc,mpi_wtime
+use mpimod, only: mpi_comm_world
+use gridio_efsoi,    only: readgriddata_efsoi, get_weight, ncdim
+use gridinfo,  only: getgridinfo, gridinfo_cleanup,              &
+                     npts, vars3d_supported, vars2d_supported
+use params,    only: nlevs, fgfileprefixes, reducedgrid, &
+                     nanals, nlons, nlats, read_member_forecasts, &
+                     read_verification, read_ensmean_forecast, &
+                     read_analysis_mean, eft, andataname, &
+                     datehr, forecast_impact, gdatehr
+use kinds,     only: r_kind, i_kind, r_double, r_single
+use mpeu_util, only: gettablesize, gettable, getindex
+use constants, only: max_varname_length
+implicit none
+
+private
+
+public :: read_state_efsoi, statevec_cleanup_efsoi, init_statevec_efsoi
+real(r_single), public, allocatable, dimension(:,:) :: grdin, grdin2, grdin3, grdin4, grdin5
+
+integer(i_kind), public :: nc2d, nc3d
+character(len=max_varname_length), allocatable, dimension(:), public :: cvars3d
+character(len=max_varname_length), allocatable, dimension(:), public :: cvars2d
+integer(i_kind), public, allocatable, dimension(:) :: index_pres
+integer(i_kind), public, allocatable, dimension(:) :: clevels
+integer(i_kind),public, allocatable, dimension(:) :: id_u, id_v, id_t, id_q
+integer(i_kind),public :: id_ps
+contains
+
+subroutine init_statevec_efsoi()
+! read table with state vector variables for efsoi
+! (code adapted from GSI state_vectors.f90 init_anasv routine
+implicit none
+character(len=*),parameter:: rcname='anavinfo'
+character(len=*),parameter:: tbname='state_vector_efsoi::'
+character(len=256),allocatable,dimension(:):: utable
+character(len=20) var,source,funcof
+integer(i_kind) luin,ii,i,ntot, k, nvars
+integer(i_kind) ilev, itracer
+integer(i_kind) u_ind,v_ind,tv_ind,q_ind,ps_ind
+
+! load file
+luin=914
+open(luin,file=rcname,form='formatted')
+
+! Scan file for desired table first
+! and get size of table
+call gettablesize(tbname,luin,ntot,nvars)
+
+! Get contents of table
+allocate(utable(nvars))
+call gettable(tbname,luin,ntot,nvars,utable)
+
+! release file unit
+close(luin)
+
+! Retrieve each token of interest from table and define
+! variables participating in control vector
+
+! Count variables first
+nc2d=0; nc3d=0; ncdim=0;
+do ii=1,nvars
+   read(utable(ii),*) var, ilev, itracer, source, funcof
+   if(ilev==1) then
+       nc2d=nc2d+1
+       ncdim=ncdim+1
+   else
+       nc3d=nc3d+1
+       ncdim=ncdim+ilev
+   endif
+enddo
+
+allocate(cvars3d(nc3d),cvars2d(nc2d),clevels(0:nc3d))
+
+! Now load information from table
+nc2d=0;nc3d=0
+clevels = 0
+do ii=1,nvars
+   read(utable(ii),*) var, ilev, itracer, source, funcof
+   if(ilev==1) then
+      nc2d=nc2d+1
+      cvars2d(nc2d)=trim(adjustl(var))
+   else if (ilev==nlevs .or. ilev==nlevs+1) then
+      nc3d=nc3d+1
+      cvars3d(nc3d) = trim(adjustl(var))
+      clevels(nc3d) = ilev + clevels(nc3d-1)
+   else
+      if (nproc .eq. 0) print *,'Error: only ', nlevs, ' and ', nlevs+1,' number of levels is supported in current version, got ',ilev
+      call stop2(503)
+   endif
+enddo
+
+deallocate(utable)
+
+allocate(index_pres(ncdim))
+ii=0
+do i=1,nc3d
+  do k=1,clevels(i)-clevels(i-1)
+    ii = ii + 1
+    index_pres(ii)=k
+  end do
+end do
+do i = 1,nc2d
+  ii = ii + 1
+  index_pres(ii) = nlevs+1
+enddo
+
+! sanity checks
+if (ncdim == 0) then
+  if (nproc == 0) print *, 'Error: there are no variables to update.'
+  call stop2(501)
+endif
+
+do i = 1, nc2d
+  if (getindex(vars2d_supported, cvars2d(i))<0) then
+    if (nproc .eq. 0) then
+      print *,'Error: 2D variable ', cvars2d(i), ' is not supported in current version.'
+      print *,'Supported variables: ', vars2d_supported
+    endif
+    call stop2(502)
+  endif
+enddo
+do i = 1, nc3d
+  if (getindex(vars3d_supported, cvars3d(i))<0) then
+    if (nproc .eq. 0) then 
+       print *,'Error: 3D variable ', cvars3d(i), ' is not supported in current version.'
+       print *,'Supported variables: ', vars3d_supported
+    endif
+    call stop2(502)
+  endif
+enddo
+
+if (nproc == 0) then 
+  print *, '2D control variables: ', cvars2d
+  print *, '3D control variables: ', cvars3d
+  print *, 'Control levels: ', clevels
+  print *, 'nc2d: ', nc2d, ', nc3d: ', nc3d, ', ncdim: ', ncdim
+endif
+
+
+call getgridinfo(fgfileprefixes(1), reducedgrid)
+
+
+! Identify EFSOI relevant state variable indices
+u_ind   = getindex(vars3d_supported, 'u')   !< indices in the state var arrays
+v_ind   = getindex(vars3d_supported, 'v')   ! U and V (3D)
+tv_ind  = getindex(vars3d_supported, 'tv')  ! Tv (3D)
+q_ind   = getindex(vars3d_supported, 'q')   ! Q (3D)
+ps_ind  = getindex(vars2d_supported, 'ps')  ! Ps (2D)
+
+! Index of each elements
+allocate(id_u(nlevs))
+allocate(id_v(nlevs))
+allocate(id_t(nlevs))
+allocate(id_q(nlevs))
+do k=1,nlevs
+   id_u(k) = clevels(u_ind-1) + k
+   id_v(k) = clevels(v_ind-1) + k
+   id_t(k) = clevels(tv_ind-1) + k
+   id_q(k) = clevels(q_ind-1) + k
+end do
+
+id_ps = clevels(nc3d) + ps_ind
+
+! Get grid weights for EFSOI
+! calculation and evaluation
+call get_weight()
+
+end subroutine init_statevec_efsoi
+
+! ====================================================================
+subroutine read_state_efsoi()
+! read ensemble members on IO tasks
+implicit none
+real(r_double)  :: t1,t2
+integer(i_kind) :: nanal
+integer(i_kind) :: ierr
+
+if (numproc < nanals) then
+   print *,'need at least nanals =',nanals,'MPI tasks,'
+   print *,'have numproc=',numproc,', exiting ...'
+   call mpi_barrier(mpi_comm_world,ierr)
+   call mpi_finalize(ierr)
+end if
+
+if (npts < numproc) then
+   print *,'cannot allocate more than npts =',npts,'MPI tasks, exiting ...'
+   call mpi_barrier(mpi_comm_world,ierr)
+   call mpi_finalize(ierr)
+end if
+
+! read in whole control vector on i/o procs - keep in memory
+! (needed in write_ensemble)
+if (nproc <= nanals-1) then
+	
+   allocate(grdin(npts,ncdim))
+   allocate(grdin2(npts,ncdim))
+   allocate(grdin3(npts,ncdim))
+   allocate(grdin4(npts,ncdim))
+   allocate(grdin5(npts,ncdim))
+   
+   nanal = nproc + 1
+   t1 = mpi_wtime()
+   
+   
+   ! Read ensemble member forecasts needed to obtain
+   ! the forecast perturbations at evaluation forecast time (EFT)
+   call readgriddata_efsoi(cvars3d,cvars2d,nc3d,nc2d,clevels,ncdim,grdin,read_member_forecasts,nanal=nanal,ft=eft,hr=datehr) 
+
+
+   if (nproc == 0) then
+      t2 = mpi_wtime()
+      print *,'time in readgridata on root',t2-t1,'secs'
+   end if
+   
+   if (nproc==0) then
+      ! Read ensemble mean forecast from analysis
+      if(forecast_impact) call readgriddata_efsoi(cvars3d,cvars2d,nc3d,nc2d,clevels,ncdim,grdin2, &
+                                  read_ensmean_forecast,nanal=0,ft=eft,hr=datehr)
+								  
+      ! Read ensemble mean forecast from first guess
+      call readgriddata_efsoi(cvars3d,cvars2d,nc3d,nc2d,clevels,ncdim,grdin3,  &
+              read_ensmean_forecast,nanal=0,ft=eft+6,hr=gdatehr)
+			  
+      ! Compute One half the sum of ensemble mean forecast quantities
+      grdin3 = 0.5_r_kind * (grdin3 + grdin2)
+	  
+      ! Verification at evaluation time
+      call readgriddata_efsoi(cvars3d,cvars2d,nc3d,nc2d,clevels,ncdim,grdin4, &
+              read_verification,infilename=andataname)
+			  
+      ! [(0.5*(e_f + e_g)) / (nanals - 1)]
+      grdin3 = (grdin3 - grdin4) / real(nanals-1,r_kind)
+      ! Normalize for surface pressure ----- (This needs to be corrected) -----
+      ! AFE trying making this like EXP-efso_fv3-CWB (which has the line
+      ! ommented - note reads: Delete the normalization of surface pressure;
+      ! The calculation of (e_t|0 + e_t|-6) do not need normalization though
+      ! dimensional analysis.
+      !grdin3(:,ncdim) = grdin3(:,ncdim) / grdin4(:,3)
+      ! Read ensemble mean analysis, used in evolving localization     
+      if(forecast_impact) call readgriddata_efsoi(cvars3d,cvars2d,nc3d,nc2d,clevels,ncdim,grdin5, &
+                                  read_analysis_mean,nanal=0,ft=0,hr=datehr)
+   end if
+end if
+   
+end subroutine read_state_efsoi
+
+subroutine statevec_cleanup_efsoi()
+! deallocate module-level allocatable arrays.
+if (allocated(cvars3d)) deallocate(cvars3d)
+if (allocated(cvars2d)) deallocate(cvars2d)
+if (allocated(clevels)) deallocate(clevels)
+if (allocated(index_pres)) deallocate(index_pres)
+if (nproc <= nanals-1 .and. allocated(grdin)) deallocate(grdin)
+if (nproc <= nanals-1 .and. allocated(grdin2)) deallocate(grdin2)
+if (nproc <= nanals-1 .and. allocated(grdin3)) deallocate(grdin3)
+if (nproc <= nanals-1 .and. allocated(grdin4)) deallocate(grdin4)
+if (nproc <= nanals-1 .and. allocated(grdin5)) deallocate(grdin5)
+call gridinfo_cleanup()
+if (allocated(id_u)) deallocate(id_u)
+if (allocated(id_v)) deallocate(id_v)
+if (allocated(id_t)) deallocate(id_t)
+if (allocated(id_q)) deallocate(id_q)
+end subroutine statevec_cleanup_efsoi
+
+end module statevec_efsoi