-
Notifications
You must be signed in to change notification settings - Fork 284
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Synthetic FF PP NetCDF and loading benchmarks.
- Loading branch information
1 parent
d1d1e00
commit 15bd351
Showing
6 changed files
with
543 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# Copyright Iris contributors | ||
# | ||
# This file is part of Iris and is released under the LGPL license. | ||
# See COPYING and COPYING.LESSER in the root of the repository for full | ||
# licensing details. | ||
""" | ||
Scripts for generating supporting data for benchmarking. | ||
Data generated using Iris should use :func:`run_function_elsewhere`, which | ||
means that data is generated using a fixed version of Iris and a fixed | ||
environment, rather than those that get changed when the benchmarking run | ||
checks out a new commit. | ||
Downstream use of data generated 'elsewhere' requires saving; usually in a | ||
NetCDF file. Could also use pickling but there is a potential risk if the | ||
benchmark sequence runs over two different Python versions. | ||
""" | ||
from inspect import getsource | ||
from os import environ | ||
from pathlib import Path | ||
from subprocess import CalledProcessError, check_output, run | ||
from textwrap import dedent | ||
|
||
#: Python executable used by :func:`run_function_elsewhere`, set via env | ||
#: variable of same name. Must be path of Python within an environment that | ||
#: includes Iris (including dependencies and test modules) and Mule. | ||
try: | ||
DATA_GEN_PYTHON = environ["DATA_GEN_PYTHON"] | ||
_ = check_output([DATA_GEN_PYTHON, "-c", "a = True"]) | ||
except KeyError: | ||
error = "Env variable DATA_GEN_PYTHON not defined." | ||
raise KeyError(error) | ||
except (CalledProcessError, FileNotFoundError, PermissionError): | ||
error = ( | ||
"Env variable DATA_GEN_PYTHON not a runnable python executable path." | ||
) | ||
raise ValueError(error) | ||
|
||
# The default location of data files used in benchmarks. Used by CI. | ||
default_data_dir = (Path(__file__).parents[2] / ".data").resolve() | ||
# Optionally override the default data location with environment variable. | ||
BENCHMARK_DATA = Path(environ.get("BENCHMARK_DATA", default_data_dir)) | ||
if BENCHMARK_DATA == default_data_dir: | ||
BENCHMARK_DATA.mkdir(exist_ok=True) | ||
elif not BENCHMARK_DATA.is_dir(): | ||
message = f"Not a directory: {BENCHMARK_DATA} ." | ||
raise ValueError(message) | ||
|
||
# Manual flag to allow the rebuilding of synthetic data. | ||
# False forces a benchmark run to re-make all the data files. | ||
REUSE_DATA = True | ||
|
||
|
||
def run_function_elsewhere(func_to_run, *args, **kwargs): | ||
""" | ||
Run a given function using the :const:`DATA_GEN_PYTHON` executable. | ||
This structure allows the function to be written natively. | ||
Parameters | ||
---------- | ||
func_to_run : FunctionType | ||
The function object to be run. | ||
NOTE: the function must be completely self-contained, i.e. perform all | ||
its own imports (within the target :const:`DATA_GEN_PYTHON` | ||
environment). | ||
*args : tuple, optional | ||
Function call arguments. Must all be expressible as simple literals, | ||
i.e. the ``repr`` must be a valid literal expression. | ||
**kwargs: dict, optional | ||
Function call keyword arguments. All values must be expressible as | ||
simple literals (see ``*args``). | ||
Returns | ||
------- | ||
str | ||
The ``stdout`` from the run. | ||
""" | ||
func_string = dedent(getsource(func_to_run)) | ||
func_string = func_string.replace("@staticmethod\n", "") | ||
func_call_term_strings = [repr(arg) for arg in args] | ||
func_call_term_strings += [ | ||
f"{name}={repr(val)}" for name, val in kwargs.items() | ||
] | ||
func_call_string = ( | ||
f"{func_to_run.__name__}(" + ",".join(func_call_term_strings) + ")" | ||
) | ||
python_string = "\n".join([func_string, func_call_string]) | ||
result = run( | ||
[DATA_GEN_PYTHON, "-c", python_string], capture_output=True, check=True | ||
) | ||
return result.stdout |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
# Copyright Iris contributors | ||
# | ||
# This file is part of Iris and is released under the LGPL license. | ||
# See COPYING and COPYING.LESSER in the root of the repository for full | ||
# licensing details. | ||
""" | ||
Generate FF, PP and NetCDF files based on a minimal synthetic FF file. | ||
NOTE: uses the Mule package, so depends on an environment with Mule installed. | ||
""" | ||
|
||
|
||
def _create_um_files( | ||
len_x: int, len_y: int, len_z: int, len_t: int, compress, save_paths: dict | ||
) -> None: | ||
""" | ||
Generate an FF object of given shape and compression, save to FF/PP/NetCDF. | ||
This is run externally | ||
(:func:`benchmarks.generate_data.run_function_elsewhere`), so all imports | ||
are self-contained and input parameters are simple types. | ||
""" | ||
from copy import deepcopy | ||
from datetime import datetime | ||
from tempfile import NamedTemporaryFile | ||
|
||
from mo_pack import compress_wgdos as mo_pack_compress | ||
from mule import ArrayDataProvider, Field3, FieldsFile | ||
from mule.pp import fields_to_pp_file | ||
import numpy as np | ||
|
||
from iris import load_cube | ||
from iris import save as save_cube | ||
|
||
def packing_patch(*compress_args, **compress_kwargs) -> bytes: | ||
""" | ||
Force conversion from returned :class:`memoryview` to :class:`bytes`. | ||
Downstream uses of :func:`mo_pack.compress_wgdos` were written | ||
for the ``Python2`` behaviour, where the returned buffer had a | ||
different ``__len__`` value to the current :class:`memoryview`. | ||
Unable to fix directly in Mule, so monkey patching for now. | ||
""" | ||
return mo_pack_compress(*compress_args, **compress_kwargs).tobytes() | ||
|
||
import mo_pack | ||
|
||
mo_pack.compress_wgdos = packing_patch | ||
|
||
######## | ||
|
||
template = { | ||
"fixed_length_header": {"dataset_type": 3, "grid_staggering": 3}, | ||
"integer_constants": { | ||
"num_p_levels": len_z, | ||
"num_cols": len_x, | ||
"num_rows": len_y, | ||
}, | ||
"real_constants": {}, | ||
"level_dependent_constants": {"dims": (len_z + 1, None)}, | ||
} | ||
new_ff = FieldsFile.from_template(deepcopy(template)) | ||
|
||
data_array = np.arange(len_x * len_y).reshape(len_x, len_y) | ||
array_provider = ArrayDataProvider(data_array) | ||
|
||
def add_field(level_: int, time_step_: int) -> None: | ||
""" | ||
Add a minimal field to the new :class:`~mule.FieldsFile`. | ||
Includes the minimum information to allow Mule saving and Iris | ||
loading, as well as incrementation for vertical levels and time | ||
steps to allow generation of z and t dimensions. | ||
""" | ||
new_field = Field3.empty() | ||
# To correspond to the header-release 3 class used. | ||
new_field.lbrel = 3 | ||
# Mule uses the first element of the lookup to test for | ||
# unpopulated fields (and skips them), so the first element should | ||
# be set to something. The year will do. | ||
new_field.raw[1] = datetime.now().year | ||
|
||
# Horizontal. | ||
new_field.lbcode = 1 | ||
new_field.lbnpt = len_x | ||
new_field.lbrow = len_y | ||
new_field.bdx = new_ff.real_constants.col_spacing | ||
new_field.bdy = new_ff.real_constants.row_spacing | ||
new_field.bzx = new_ff.real_constants.start_lon - 0.5 * new_field.bdx | ||
new_field.bzy = new_ff.real_constants.start_lat - 0.5 * new_field.bdy | ||
|
||
# Hemisphere. | ||
new_field.lbhem = 32 | ||
# Processing. | ||
new_field.lbproc = 0 | ||
|
||
# Vertical. | ||
# Hybrid height values by simulating sequences similar to those in a | ||
# theta file. | ||
new_field.lbvc = 65 | ||
if level_ == 0: | ||
new_field.lblev = 9999 | ||
else: | ||
new_field.lblev = level_ | ||
|
||
level_1 = level_ + 1 | ||
six_rec = 20 / 3 | ||
three_rec = six_rec / 2 | ||
|
||
new_field.blev = level_1 ** 2 * six_rec - six_rec | ||
new_field.brsvd1 = ( | ||
level_1 ** 2 * six_rec + (six_rec * level_1) - three_rec | ||
) | ||
|
||
brsvd2_simulated = np.linspace(0.995, 0, len_z) | ||
shift = min(len_z, 2) | ||
bhrlev_simulated = np.concatenate( | ||
[np.ones(shift), brsvd2_simulated[:-shift]] | ||
) | ||
new_field.brsvd2 = brsvd2_simulated[level_] | ||
new_field.bhrlev = bhrlev_simulated[level_] | ||
|
||
# Time. | ||
new_field.lbtim = 11 | ||
|
||
new_field.lbyr = time_step_ | ||
for attr_name in ["lbmon", "lbdat", "lbhr", "lbmin", "lbsec"]: | ||
setattr(new_field, attr_name, 0) | ||
|
||
new_field.lbyrd = time_step_ + 1 | ||
for attr_name in ["lbmond", "lbdatd", "lbhrd", "lbmind", "lbsecd"]: | ||
setattr(new_field, attr_name, 0) | ||
|
||
# Data and packing. | ||
new_field.lbuser1 = 1 | ||
new_field.lbpack = int(compress) | ||
new_field.bacc = 0 | ||
new_field.bmdi = -1 | ||
new_field.lbext = 0 | ||
new_field.set_data_provider(array_provider) | ||
|
||
new_ff.fields.append(new_field) | ||
|
||
for time_step in range(len_t): | ||
for level in range(len_z): | ||
add_field(level, time_step + 1) | ||
|
||
ff_path = save_paths.get("FF", None) | ||
pp_path = save_paths.get("PP", None) | ||
nc_path = save_paths.get("NetCDF", None) | ||
|
||
if ff_path: | ||
new_ff.to_file(ff_path) | ||
if pp_path: | ||
fields_to_pp_file(str(pp_path), new_ff.fields) | ||
if nc_path: | ||
temp_ff_path = None | ||
# Need an Iris Cube from the FF content. | ||
if ff_path: | ||
# Use the existing file. | ||
ff_cube = load_cube(ff_path) | ||
else: | ||
# Make a temporary file. | ||
temp_ff_path = NamedTemporaryFile() | ||
new_ff.to_file(temp_ff_path.name) | ||
ff_cube = load_cube(temp_ff_path.name) | ||
|
||
save_cube(ff_cube, nc_path, zlib=compress) | ||
if temp_ff_path: | ||
temp_ff_path.close() | ||
|
||
|
||
FILE_EXTENSIONS = {"FF": "", "PP": ".pp", "NetCDF": ".nc"} | ||
|
||
|
||
def create_um_files( | ||
len_x: int, | ||
len_y: int, | ||
len_z: int, | ||
len_t: int, | ||
compress: bool, | ||
file_types: list, | ||
) -> dict: | ||
""" | ||
Generate FF-based FF / PP / NetCDF files with specified shape and compression. | ||
All files representing a given shape are saved in a dedicated directory. A | ||
dictionary of the saved paths is returned. | ||
If the required files exist, they are re-used, unless | ||
:const:`benchmarks.REUSE_DATA` is ``False``. | ||
""" | ||
# Self contained imports to avoid linting confusion with _create_um_files(). | ||
from . import BENCHMARK_DATA, REUSE_DATA, run_function_elsewhere | ||
|
||
save_name_sections = ["UM", len_x, len_y, len_z, len_t] | ||
save_name = "_".join(str(section) for section in save_name_sections) | ||
save_dir = BENCHMARK_DATA / save_name | ||
if not save_dir.is_dir(): | ||
save_dir.mkdir(parents=True) | ||
|
||
save_paths = {} | ||
files_exist = True | ||
for file_type in file_types: | ||
file_ext = FILE_EXTENSIONS[file_type] | ||
save_path = (save_dir / f"{compress}").with_suffix(file_ext) | ||
files_exist = files_exist and save_path.is_file() | ||
save_paths[file_type] = str(save_path) | ||
|
||
if not REUSE_DATA or not files_exist: | ||
_ = run_function_elsewhere( | ||
_create_um_files, len_x, len_y, len_z, len_t, compress, save_paths | ||
) | ||
|
||
return save_paths |
Oops, something went wrong.