Skip to content

Commit

Permalink
Sperf & Cperf Benchmarks (#4621)
Browse files Browse the repository at this point in the history
  • Loading branch information
trexfeathers authored Mar 10, 2022
1 parent 6377594 commit c27f524
Show file tree
Hide file tree
Showing 27 changed files with 1,141 additions and 155 deletions.
23 changes: 20 additions & 3 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,20 @@ automated overnight run locally. See the session docstring for detail.

### Environment variables

* ``DATA_GEN_PYTHON`` - required - path to a Python executable that can be
* `OVERRIDE_TEST_DATA_REPOSITORY` - required - some benchmarks use
`iris-test-data` content, and your local `site.cfg` is not available for
benchmark scripts.
* `DATA_GEN_PYTHON` - required - path to a Python executable that can be
used to generate benchmark test objects/files; see
[Data generation](#data-generation). The Nox session sets this automatically,
but will defer to any value already set in the shell.
* ``BENCHMARK_DATA`` - optional - path to a directory for benchmark synthetic
* `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic
test data, which the benchmark scripts will create if it doesn't already
exist. Defaults to ``<root>/benchmarks/.data/`` if not set.
exist. Defaults to `<root>/benchmarks/.data/` if not set.
* `ON_DEMAND_BENCHMARKS` - optional - when set (to any value): benchmarks
decorated with `@on_demand_benchmark` are included in the ASV run. Usually
coupled with the ASV `--bench` argument to only run the benchmark(s) of
interest. Is set during the Nox `cperf` and `sperf` sessions.

## Writing benchmarks

Expand Down Expand Up @@ -65,6 +72,16 @@ be significantly larger (e.g. a 1000x1000 `Cube`). Performance differences
might only be seen for the larger value, or the smaller, or both, getting you
closer to the root cause.

### On-demand benchmarks

Some benchmarks provide useful insight but are inappropriate to be included in
a benchmark run by default, e.g. those with long run-times or requiring a local
file. These benchmarks should be decorated with `@on_demand_benchmark`
(see [benchmarks init](./benchmarks/__init__.py)), which
sets the benchmark to only be included in a run when the `ON_DEMAND_BENCHMARKS`
environment variable is set. Examples include the CPerf and SPerf benchmark
suites for the UK Met Office NG-VAT project.

## Benchmark environments

We have disabled ASV's standard environment management, instead using an
Expand Down
1 change: 1 addition & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"repo": "..",
"environment_type": "conda-delegated",
"show_commit_url": "http://github.com/scitools/iris/commit/",
"branches": ["upstream/main"],

"benchmark_dir": "./benchmarks",
"env_dir": ".asv/env",
Expand Down
61 changes: 59 additions & 2 deletions benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""Common code for benchmarks."""
from functools import wraps
from os import environ
import resource

from .generate_data import BENCHMARK_DATA, run_function_elsewhere

ARTIFICIAL_DIM_SIZE = int(10e3) # For all artificial cubes, coords etc.


Expand Down Expand Up @@ -70,3 +70,60 @@ def __exit__(self, *_):
def addedmem_mb(self):
"""Return measured memory growth, in Mb."""
return self.mb_after - self.mb_before

@staticmethod
def decorator(changed_params: list = None):
"""
Decorates this benchmark to track growth in resident memory during execution.
Intended for use on ASV ``track_`` benchmarks. Applies the
:class:`TrackAddedMemoryAllocation` context manager to the benchmark
code, sets the benchmark ``unit`` attribute to ``Mb``. Optionally
replaces the benchmark ``params`` attribute with ``changed_params`` -
useful to avoid testing very small memory volumes, where the results
are vulnerable to noise.
Parameters
----------
changed_params : list
Replace the benchmark's ``params`` attribute with this list.
"""
if changed_params:
# Must make a copy for re-use safety!
_changed_params = list(changed_params)
else:
_changed_params = None

def _inner_decorator(decorated_func):
@wraps(decorated_func)
def _inner_func(*args, **kwargs):
assert decorated_func.__name__[:6] == "track_"
# Run the decorated benchmark within the added memory context manager.
with TrackAddedMemoryAllocation() as mb:
decorated_func(*args, **kwargs)
return mb.addedmem_mb()

if _changed_params:
# Replace the params if replacement provided.
_inner_func.params = _changed_params
_inner_func.unit = "Mb"
return _inner_func

return _inner_decorator


def on_demand_benchmark(benchmark_object):
"""
Decorator. Disables these benchmark(s) unless ON_DEMAND_BENCHARKS env var is set.
For benchmarks that, for whatever reason, should not be run by default.
E.g:
* Require a local file
* Used for scalability analysis instead of commit monitoring.
Can be applied to benchmark classes/methods/functions.
"""
if "ON_DEMAND_BENCHMARKS" in environ:
return benchmark_object
97 changes: 97 additions & 0 deletions benchmarks/benchmarks/cperf/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.
CPerf = comparing performance working with data in UM versus LFRic formats.
Files available from the UK Met Office:
moo ls moose:/adhoc/projects/avd/asv/data_for_nightly_tests/
"""
import numpy as np

from iris import load_cube

# TODO: remove uses of PARSE_UGRID_ON_LOAD once UGRID parsing is core behaviour.
from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD

from ..generate_data import BENCHMARK_DATA
from ..generate_data.ugrid import make_cubesphere_testfile

# The data of the core test UM files has dtype=np.float32 shape=(1920, 2560)
_UM_DIMS_YX = (1920, 2560)
# The closest cubesphere size in terms of datapoints is sqrt(1920*2560 / 6)
# This gives ~= 905, i.e. "C905"
_N_CUBESPHERE_UM_EQUIVALENT = int(np.sqrt(np.prod(_UM_DIMS_YX) / 6))


class SingleDiagnosticMixin:
"""For use in any benchmark classes that work on a single diagnostic file."""

params = [
["LFRic", "UM", "UM_lbpack0", "UM_netcdf"],
[False, True],
[False, True],
]
param_names = ["file type", "height dim (len 71)", "time dim (len 3)"]

def setup(self, file_type, three_d, three_times):
if file_type == "LFRic":
# Generate an appropriate synthetic LFRic file.
if three_times:
n_times = 3
else:
n_times = 1

# Use a cubesphere size ~equivalent to our UM test data.
cells_per_panel_edge = _N_CUBESPHERE_UM_EQUIVALENT
create_kwargs = dict(c_size=cells_per_panel_edge, n_times=n_times)

if three_d:
create_kwargs["n_levels"] = 71

# Will re-use a file if already present.
file_path = make_cubesphere_testfile(**create_kwargs)

else:
# Locate the appropriate UM file.
if three_times:
# pa/pb003 files
numeric = "003"
else:
# pa/pb000 files
numeric = "000"

if three_d:
# theta diagnostic, N1280 file w/ 71 levels (1920, 2560, 71)
file_name = f"umglaa_pb{numeric}-theta"
else:
# surface_temp diagnostic, N1280 file (1920, 2560)
file_name = f"umglaa_pa{numeric}-surfacetemp"

file_suffices = {
"UM": "", # packed FF (WGDOS lbpack = 1)
"UM_lbpack0": ".uncompressed", # unpacked FF (lbpack = 0)
"UM_netcdf": ".nc", # UM file -> Iris -> NetCDF file
}
suffix = file_suffices[file_type]

file_path = (BENCHMARK_DATA / file_name).with_suffix(suffix)
if not file_path.exists():
message = "\n".join(
[
f"Expected local file not found: {file_path}",
"Available from the UK Met Office.",
]
)
raise FileNotFoundError(message)

self.file_path = file_path
self.file_type = file_type

def load(self):
with PARSE_UGRID_ON_LOAD.context():
return load_cube(str(self.file_path))
58 changes: 58 additions & 0 deletions benchmarks/benchmarks/cperf/equality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Equality benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.
"""
from . import SingleDiagnosticMixin
from .. import on_demand_benchmark


class EqualityMixin(SingleDiagnosticMixin):
"""
Uses :class:`SingleDiagnosticMixin` as the realistic case will be comparing
:class:`~iris.cube.Cube`\\ s that have been loaded from file.
"""

# Cut down the parent parameters.
params = [["LFRic", "UM"]]

def setup(self, file_type, three_d=False, three_times=False):
super().setup(file_type, three_d, three_times)
self.cube = self.load()
self.other_cube = self.load()


@on_demand_benchmark
class CubeEquality(EqualityMixin):
"""
Benchmark time and memory costs of comparing LFRic and UM
:class:`~iris.cube.Cube`\\ s.
"""

def _comparison(self):
_ = self.cube == self.other_cube

def peakmem_eq(self, file_type):
self._comparison()

def time_eq(self, file_type):
self._comparison()


@on_demand_benchmark
class MeshEquality(EqualityMixin):
"""Provides extra context for :class:`CubeEquality`."""

params = [["LFRic"]]

def _comparison(self):
_ = self.cube.mesh == self.other_cube.mesh

def peakmem_eq(self, file_type):
self._comparison()

def time_eq(self, file_type):
self._comparison()
57 changes: 57 additions & 0 deletions benchmarks/benchmarks/cperf/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
File loading benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.
"""
from . import SingleDiagnosticMixin
from .. import on_demand_benchmark


@on_demand_benchmark
class SingleDiagnosticLoad(SingleDiagnosticMixin):
def time_load(self, _, __, ___):
"""
The 'real world comparison'
* UM coords are always realised (DimCoords).
* LFRic coords are not realised by default (MeshCoords).
"""
cube = self.load()
assert cube.has_lazy_data()
# UM files load lon/lat as DimCoords, which are always realised.
expecting_lazy_coords = self.file_type == "LFRic"
for coord_name in "longitude", "latitude":
coord = cube.coord(coord_name)
assert coord.has_lazy_points() == expecting_lazy_coords
assert coord.has_lazy_bounds() == expecting_lazy_coords

def time_load_w_realised_coords(self, _, __, ___):
"""A valuable extra comparison where both UM and LFRic coords are realised."""
cube = self.load()
for coord_name in "longitude", "latitude":
coord = cube.coord(coord_name)
# Don't touch actual points/bounds objects - permanent
# realisation plays badly with ASV's re-run strategy.
if coord.has_lazy_points():
coord.core_points().compute()
if coord.has_lazy_bounds():
coord.core_bounds().compute()


@on_demand_benchmark
class SingleDiagnosticRealise(SingleDiagnosticMixin):
# The larger files take a long time to realise.
timeout = 600.0

def setup(self, file_type, three_d, three_times):
super().setup(file_type, three_d, three_times)
self.loaded_cube = self.load()

def time_realise(self, _, __, ___):
# Don't touch loaded_cube.data - permanent realisation plays badly with
# ASV's re-run strategy.
assert self.loaded_cube.has_lazy_data()
self.loaded_cube.core_data().compute()
47 changes: 47 additions & 0 deletions benchmarks/benchmarks/cperf/save.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
File saving benchmarks for the CPerf scheme of the UK Met Office's NG-VAT project.
"""

from iris import save

from . import _N_CUBESPHERE_UM_EQUIVALENT, _UM_DIMS_YX
from .. import TrackAddedMemoryAllocation, on_demand_benchmark
from ..generate_data.ugrid import (
make_cube_like_2d_cubesphere,
make_cube_like_umfield,
)


@on_demand_benchmark
class NetcdfSave:
"""
Benchmark time and memory costs of saving ~large-ish data cubes to netcdf.
Parametrised by file type.
"""

params = ["LFRic", "UM"]
param_names = ["data type"]

def setup(self, data_type):
if data_type == "LFRic":
self.cube = make_cube_like_2d_cubesphere(
n_cube=_N_CUBESPHERE_UM_EQUIVALENT, with_mesh=True
)
else:
self.cube = make_cube_like_umfield(_UM_DIMS_YX)

def _save_data(self, cube):
save(cube, "tmp.nc")

def time_save_data_netcdf(self, data_type):
self._save_data(self.cube)

@TrackAddedMemoryAllocation.decorator()
def track_addedmem_save_data_netcdf(self, data_type):
self._save_data(self.cube)
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

from iris.experimental import ugrid

from .. import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
from ..generate_data.stock import sample_mesh
from ... import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
from ...generate_data.stock import sample_mesh


class UGridCommon:
Expand Down
Loading

0 comments on commit c27f524

Please sign in to comment.