Skip to content

Commit

Permalink
Merge pull request #21 from ec-jrc/development
Browse files Browse the repository at this point in the history
Added full correlation distance decay (CDD) interpolation method, using a map of CDD values.
Speeds up angular distance weighted (ADW) interpolation by using numba.
Added option to split computation in sub-regions to save memory
  • Loading branch information
doc78 authored Apr 9, 2024
2 parents e82e448 + 862730e commit e42744a
Show file tree
Hide file tree
Showing 13 changed files with 661 additions and 136 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci_env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
run: |
conda list
pip install poppler-utils
conda install 'poppler=>22.10.0'
conda install -c conda-forge 'poppler=>22.10.0'
- name: Check installation
shell: bash -el {0}
run: |
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
/build
pyg2p.egg-info
/dist

*.nbc
*.nbi
/.python-version
.coverage
.coverage.*
34 changes: 31 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -670,16 +670,44 @@ Attributes p, leafsize and eps for the kd tree algorithm are default in scipy li
| leafsize | 10 |

#### ADW
It's the Angular Distance Weighted (ADW) algorithm with scipy.kd_tree, using 4 neighbours.
If @adw_broadcasting is set to true, computations will run in full broadcasting mode but requires more memory
It's the Angular Distance Weighted (ADW) algorithm by Shepard et al. 1968, with scipy.kd_tree using 11 neighbours.
If @use_broadcasting is set to true, computations will run in full broadcasting mode but requires more memory
If @num_of_splits is set to any number, computations will be split on subset and then recollected into the final map, to save mamory (do not set it if you have enought memory to run interpolation)

```json
{
"Interpolation": {
"@latMap": "/dataset/maps/europe5km/lat.map",
"@lonMap": "/dataset/maps/europe5km/long.map",
"@mode": "adw",
"@adw_broadcasting": false}
"@use_broadcasting": false,
"@num_of_splits": 10}
}
```

#### CDD
It's the Correlation Distance Decay (CDD) modified implementation of the Angular Distance Weighted algorithm, with scipy.kd_tree using 11 neighbours. It needs a map of CDD values for each point, to be specified in the field @cdd_map
@cdd_mode can be one of the following values: "Hofstra", "NewEtAl" or "MixHofstraShepard"
In case of mode "MixHofstraShepard", @cdd_options allows to customize the parameters of Hofstra and Shepard algorithm ("weights_mode": can be "All" or "OnlyTOP10" to take 10 higher values only in the interpolation of each point).
If @use_broadcasting is set to true, computations will run in full broadcasting mode but requires more memory
If @num_of_splits is set to any number, computations will be split on subset and then recollected into the final map, to save mamory (do not set it if you have enought memory to run interpolation)

```json
{
"Interpolation": {
"@latMap": "/dataset/maps/europe5km/lat.map",
"@lonMap": "/dataset/maps/europe5km/long.map",
"@mode": "cdd",
"@cdd_map": "/dataset/maps/europe5km/cdd_map.nc",
"@cdd_mode": "MixHofstraShepard",
"@cdd_options": {
"m_const": 4,
"min_num_of_station": 4,
"radius_ratio": 0.3333333333333333,
"weights_mode": "All"
},
"@use_broadcasting": false,
"@num_of_splits": 10}
}
```

Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
netCDF4>=1.5.3
netCDF4>=1.5.3,<=1.6.4
ujson>=5.4.0
numpy>=1.18.2
scipy>=1.4.1
numba>=0.54.0
scipy>=1.6.0
# GDAL
numexpr>=2.7.1
importlib-metadata<5.0.0
Expand Down
2 changes: 1 addition & 1 deletion src/pyg2p/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.2.6
3.2.7
4 changes: 4 additions & 0 deletions src/pyg2p/main/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ def _define_exec_params(self):
self._vars['outMaps.clone'] = self.api_conf['OutMaps']['cloneMap']
interpolation_conf = self.api_conf['OutMaps']['Interpolation']
self._vars['interpolation.mode'] = interpolation_conf.get('mode', self.default_values['interpolation.mode'])
self._vars['interpolation.cdd_map'] = interpolation_conf.get('cdd_map', '')
self._vars['interpolation.cdd_mode'] = interpolation_conf.get('cdd_mode', '')
self._vars['interpolation.cdd_options'] = interpolation_conf.get('cdd_options', None)
self._vars['interpolation.use_broadcasting'] = interpolation_conf.get('use_broadcasting', False)
self._vars['interpolation.num_of_splits'] = interpolation_conf.get('num_of_splits', None)
self._vars['interpolation.rotated_target'] = interpolation_conf.get('rotated_target', False)
if not self._vars['interpolation.dir'] and self.api_conf.get('intertableDir'):
self._vars['interpolation.dirs']['user'] = self.api_conf['intertableDir']
Expand Down
6 changes: 5 additions & 1 deletion src/pyg2p/main/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,11 @@ def _define_exec_params(self):
self._vars['outMaps.clone'] = exec_conf['OutMaps']['@cloneMap']
interpolation_conf = exec_conf['OutMaps']['Interpolation']
self._vars['interpolation.mode'] = interpolation_conf.get('@mode', self.default_values['interpolation.mode'])
self._vars['interpolation.adw_broadcasting'] = interpolation_conf.get('@adw_broadcasting', False)
self._vars['interpolation.cdd_map'] = interpolation_conf.get('@cdd_map', '')
self._vars['interpolation.cdd_mode'] = interpolation_conf.get('@cdd_mode', '')
self._vars['interpolation.cdd_options'] = interpolation_conf.get('@cdd_options', None)
self._vars['interpolation.use_broadcasting'] = interpolation_conf.get('@use_broadcasting', False)
self._vars['interpolation.num_of_splits'] = interpolation_conf.get('@num_of_splits', None)
self._vars['interpolation.rotated_target'] = interpolation_conf.get('@rotated_target', False)
if not self._vars['interpolation.dir'] and interpolation_conf.get('@intertableDir'):
# get from JSON
Expand Down
42 changes: 35 additions & 7 deletions src/pyg2p/main/interpolation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
class Interpolator(Loggable):
_LOADED_INTERTABLES = {}
_prefix = 'I'
scipy_modes_nnear = {'nearest': 1, 'invdist': 4, 'adw': 4, 'cdd': 4, 'bilinear': 4, 'triangulation': 3, 'bilinear_delaunay': 4}
scipy_modes_nnear = {'nearest': 1, 'invdist': 4, 'adw': 11, 'cdd': 11, 'bilinear': 4, 'triangulation': 3, 'bilinear_delaunay': 4}
suffixes = {'grib_nearest': 'grib_nearest', 'grib_invdist': 'grib_invdist',
'nearest': 'scipy_nearest', 'invdist': 'scipy_invdist', 'adw': 'scipy_adw', 'cdd': 'scipy_cdd',
'bilinear': 'scipy_bilinear', 'triangulation': 'scipy_triangulation', 'bilinear_delaunay': 'scipy_bilinear_delaunay'}
Expand All @@ -31,7 +31,11 @@ def __init__(self, exec_ctx, mv_input):
self._mv_grib = mv_input
self.interpolate_with_grib = exec_ctx.is_with_grib_interpolation
self._mode = exec_ctx.get('interpolation.mode')
self._adw_broadcasting = exec_ctx.get('interpolation.adw_broadcasting')
self._cdd_map = exec_ctx.get('interpolation.cdd_map')
self._cdd_mode = exec_ctx.get('interpolation.cdd_mode')
self._cdd_options = exec_ctx.get('interpolation.cdd_options')
self._use_broadcasting = exec_ctx.get('interpolation.use_broadcasting')
self._num_of_splits = exec_ctx.get('interpolation.num_of_splits')
self._source_filename = pyg2p.util.files.filename(exec_ctx.get('input.file'))
self._suffix = self.suffixes[self._mode]
self._intertable_dirs = exec_ctx.get('interpolation.dirs')
Expand Down Expand Up @@ -167,6 +171,10 @@ def interpolate_scipy(self, latgrib, longrib, v, grid_id, grid_details=None):
# Global_3arcmin DEBUG
latefas=self._target_coords.lats[1800-int(DEBUG_MAX_LAT*20):1800-int(DEBUG_MIN_LAT*20), 3600+int(DEBUG_MIN_LON*20):3600+int(DEBUG_MAX_LON*20)]
lonefas=self._target_coords.lons[1800-int(DEBUG_MAX_LAT*20):1800-int(DEBUG_MIN_LAT*20), 3600+int(DEBUG_MIN_LON*20):3600+int(DEBUG_MAX_LON*20)]
#latefas-=0.008369999999992217
# lonefas-=0.00851999999999431
#lonefas-=0.024519999999977227

else:
# European_1arcmin DEBUG
selection_lats = np.logical_and(self._target_coords.lats[:,0]>=DEBUG_MIN_LAT,self._target_coords.lats[:,0]<=DEBUG_MAX_LAT)
Expand Down Expand Up @@ -194,9 +202,18 @@ def interpolate_scipy(self, latgrib, longrib, v, grid_id, grid_details=None):
# latgrib = np.array([ 7.39050803, 8.72151493, 7.82210701, 7.35906546])
# longrib = np.array([49.16690015, 48.11557968, 48.27217824, 49.70238655])
# v = np.array([100, 133, 166, 200 ])
latgrib = np.array([ 8, 8, 8, 8])
longrib = np.array([45, 48.5, 49, 50])
v = np.array([200, 100, 100, 100 ])
# latgrib = np.array([ 8, 8, 8, 8])
# longrib = np.array([45, 48.5, 49, 50])
# v = np.array([200, 100, 100, 100 ])

# OR load data points for the TEST from file
#data = np.genfromtxt('/media/sf_VMSharedFolder/pyg2p_adw_cdd_test/pr199106180600_idw.txt', delimiter='\t', skip_header=1)
#data = np.genfromtxt('/media/sf_VMSharedFolder/pyg2p_adw_cdd_test/pr199106170600_20230714101901.txt', delimiter='\t', skip_header=1)
data = np.genfromtxt('/media/sf_VMSharedFolder/test_split/tn202401010600_20240213140643.txt', delimiter='\t', skip_header=1)
longrib = data[:,0]
latgrib = data[:,1]
v = data[:,2]

intertable_id, intertable_name = 'DEBUG_ADW','DEBUG_ADW.npy'

nnear = self.scipy_modes_nnear[self._mode]
Expand All @@ -212,11 +229,22 @@ def interpolate_scipy(self, latgrib, longrib, v, grid_id, grid_details=None):
self._log('Trying to interpolate without grib lat/lons. Probably a malformed grib!', 'ERROR')
raise ApplicationException.get_exc(5000)

# CR: to use float32 computations uncomment here:
# longrib=np.float32(longrib)
# latgrib=np.float32(latgrib)
# lonefas=np.float32(lonefas)
# latefas=np.float32(latefas)
# v=np.float32(v)
# self.mv_out=np.float32(self.mv_out)

self._log('\nInterpolating table not found\n Id: {}\nWill create file: {}'.format(intertable_id, intertable_name), 'WARN')
scipy_interpolation = ScipyInterpolation(longrib, latgrib, grid_details, v.ravel(), nnear, self.mv_out,
self._mv_grib, target_is_rotated=self._rotated_target_grid,
parallel=self.parallel, mode=self._mode, use_broadcasting=self._adw_broadcasting)
_, weights, indexes = scipy_interpolation.interpolate(lonefas, latefas)
parallel=self.parallel, mode=self._mode,
cdd_map=self._cdd_map, cdd_mode=self._cdd_mode, cdd_options = self._cdd_options,
use_broadcasting=self._use_broadcasting,
num_of_splits=self._num_of_splits)
_, weights, indexes = scipy_interpolation.interpolate(lonefas, latefas)
result = self._interpolate_scipy_append_mv(v, weights, indexes, nnear)

# saving interpolation lookup table
Expand Down
Loading

0 comments on commit e42744a

Please sign in to comment.