Skip to content

Commit

Permalink
added mlclouds col chunk and max workers to template configs
Browse files Browse the repository at this point in the history
  • Loading branch information
grantbuster committed Mar 11, 2022
1 parent 9f60431 commit a31e5c8
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
4 changes: 3 additions & 1 deletion nsrdb/config/templates/config_nsrdb_post2017.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
"walltime": 4
},
"ml-cloud-fill": {
"fill_all": false
"col_chunk": 10000,
"fill_all": false,
"max_workers": null
}
}
4 changes: 3 additions & 1 deletion nsrdb/config/templates/config_nsrdb_pre2018.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@
"walltime": 4
},
"ml-cloud-fill": {
"fill_all": false
"col_chunk": 10000,
"fill_all": false,
"max_workers": null
}
}
13 changes: 10 additions & 3 deletions nsrdb/gap_fill/mlclouds_fill.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,21 +1008,28 @@ def run(cls, h5_source, fill_all=False, model_path=None, var_meta=None,
.format(h5_source))
logger.info('Running MLCloudsFill with model: {}'
.format(model_path))
logger.info('Running MLCloudsFill with col_chunk: {}'
.format(col_chunk))
obj = cls(h5_source, fill_all=fill_all, model_path=model_path,
var_meta=var_meta)
obj.archive_cld_properties()
clean_data, fill_flag = obj._init_clean_arrays()

if col_chunk is None:
slices = [slice(None)]
logger.info('MLClouds gap fill is being run without col_chunk for '
'full data shape {} all on one process. If you see '
'memory errors, try setting the col_chunk input to '
'distribute the problem across multiple small workers.'
.format(obj._res_shape))
else:
columns = np.arange(obj._res_shape[1])
N = np.ceil(len(columns) / col_chunk)
arrays = np.array_split(columns, N)
slices = [slice(a[0], 1 + a[-1]) for a in arrays]
logger.info('Gap fill will be run across the full data column '
'shape {} in {} column chunks with approximately {} '
'sites per chunk'
logger.info('MLClouds gap fill will be run across the full data '
'column shape {} in {} column chunks with '
'approximately {} sites per chunk'
.format(len(columns), len(slices), col_chunk))

if max_workers == 1:
Expand Down

0 comments on commit a31e5c8

Please sign in to comment.