From 089115d9106f21ba58bdac37b52f39dc279c1ee1 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sat, 23 Nov 2024 04:38:11 +0000 Subject: [PATCH 01/15] DataOffload: Move FieldPointerMap outside of transformer class --- .../transformations/data_offload/field_api.py | 133 +++++++++--------- 1 file changed, 68 insertions(+), 65 deletions(-) diff --git a/loki/transformations/data_offload/field_api.py b/loki/transformations/data_offload/field_api.py index 00d9de4f4..33b5f57d0 100644 --- a/loki/transformations/data_offload/field_api.py +++ b/loki/transformations/data_offload/field_api.py @@ -25,7 +25,7 @@ ) -__all__ = ['FieldOffloadTransformation'] +__all__ = ['FieldOffloadTransformation', 'FieldPointerMap'] def find_target_calls(region, targets): @@ -72,69 +72,6 @@ class FieldOffloadTransformation(Transformation): calls (defaults to ``'IBL'``). """ - class FieldPointerMap: - """ - Helper class to :any:`FieldOffloadTransformation` that is used to store arrays passed to - target kernel calls and the corresponding device pointers added by the transformation. - The pointer/array variable pairs are exposed through the class properties, based on - the intent of the kernel argument. - """ - def __init__(self, devptrs, inargs, inoutargs, outargs): - self.inargs = inargs - self.inoutargs = inoutargs - self.outargs = outargs - self.devptrs = devptrs - - - @property - def in_pairs(self): - """ - Iterator that yields array/pointer pairs for kernel arguments of intent(in). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - for i, inarg in enumerate(self.inargs): - yield inarg, self.devptrs[i] - - @property - def inout_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(inout). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - start = len(self.inargs) - for i, inoutarg in enumerate(self.inoutargs): - yield inoutarg, self.devptrs[i+start] - - @property - def out_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(out) - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - - start = len(self.inargs)+len(self.inoutargs) - for i, outarg in enumerate(self.outargs): - yield outarg, self.devptrs[i+start] - - def __init__(self, devptr_prefix=None, field_group_types=None, offload_index=None): self.deviceptr_prefix = 'loki_devptr_' if devptr_prefix is None else devptr_prefix field_group_types = [''] if field_group_types is None else field_group_types @@ -157,7 +94,7 @@ def process_driver(self, driver, targets): kernel_calls = find_target_calls(region, targets) offload_variables = self.find_offload_variables(driver, kernel_calls) device_ptrs = self._declare_device_ptrs(driver, offload_variables) - offload_map = self.FieldPointerMap(device_ptrs, *offload_variables) + offload_map = FieldPointerMap(device_ptrs, *offload_variables) self._add_field_offload_calls(driver, region, offload_map) self._replace_kernel_args(driver, kernel_calls, offload_map) @@ -251,3 +188,69 @@ def _replace_kernel_args(self, driver, kernel_calls, offload_map): arg_transformer = SubstituteExpressions(change_map, inplace=True) for call in kernel_calls: arg_transformer.visit(call) + + +class FieldPointerMap: + """ + Helper class to map FIELD API pointers to intents and access descriptors. + + This utility is used to store arrays passed to target kernel calls + and the corresponding device pointers added by the transformation. + + The pointer/array variable pairs are exposed through the class + properties, based on the intent of the kernel argument. + """ + def __init__(self, devptrs, inargs, inoutargs, outargs): + self.inargs = inargs + self.inoutargs = inoutargs + self.outargs = outargs + self.devptrs = devptrs + + + @property + def in_pairs(self): + """ + Iterator that yields array/pointer pairs for kernel arguments of intent(in). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + for i, inarg in enumerate(self.inargs): + yield inarg, self.devptrs[i] + + @property + def inout_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(inout). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + start = len(self.inargs) + for i, inoutarg in enumerate(self.inoutargs): + yield inoutarg, self.devptrs[i+start] + + @property + def out_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(out) + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + + start = len(self.inargs)+len(self.inoutargs) + for i, outarg in enumerate(self.outargs): + yield outarg, self.devptrs[i+start] From c7c39406bd957bdea1f2e2eb7f39a53c09a4c401 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sat, 23 Nov 2024 05:33:06 +0000 Subject: [PATCH 02/15] DataOffload: Filter duplicates with dict to retain ordering Sets are unordered, so using them to filter creates an arbitrary ordering, which in turn yields an unpredictable order of declarations. --- loki/transformations/data_offload/field_api.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/loki/transformations/data_offload/field_api.py b/loki/transformations/data_offload/field_api.py index 33b5f57d0..e344faace 100644 --- a/loki/transformations/data_offload/field_api.py +++ b/loki/transformations/data_offload/field_api.py @@ -133,11 +133,12 @@ def find_offload_variables(self, driver, calls): inargs = tuple(v for v in inargs if v not in inoutargs) outargs = tuple(v for v in outargs if v not in inoutargs) - inargs = tuple(set(inargs)) - inoutargs = tuple(set(inoutargs)) - outargs = tuple(set(outargs)) - return inargs, inoutargs, outargs + # Filter out duplicates and return as tuple + inargs = tuple(dict.fromkeys(inargs)) + inoutargs = tuple(dict.fromkeys(inoutargs)) + outargs = tuple(dict.fromkeys(outargs)) + return inargs, inoutargs, outargs def _declare_device_ptrs(self, driver, offload_variables): device_ptrs = tuple(self._devptr_from_array(driver, a) for a in chain(*offload_variables)) From 377bba7a8e5f2027422621e85583c92b5304ec33 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sat, 23 Nov 2024 06:08:25 +0000 Subject: [PATCH 03/15] DataOffload: Expose the core functionalities of FieldOffloadTrafo This allows us to re-use the key pieces without being hooked into the `!$loki data` regions semantics. --- .../transformations/data_offload/field_api.py | 236 +++++++++--------- 1 file changed, 123 insertions(+), 113 deletions(-) diff --git a/loki/transformations/data_offload/field_api.py b/loki/transformations/data_offload/field_api.py index e344faace..b61e99fcf 100644 --- a/loki/transformations/data_offload/field_api.py +++ b/loki/transformations/data_offload/field_api.py @@ -25,23 +25,11 @@ ) -__all__ = ['FieldOffloadTransformation', 'FieldPointerMap'] - - -def find_target_calls(region, targets): - """ - Returns a list of all calls to targets inside the region. - - Parameters - ---------- - :region: :any:`PragmaRegion` - :targets: collection of :any:`Subroutine` - Iterable object of subroutines or functions called - :returns: list of :any:`CallStatement` - """ - calls = FindNodes(CallStatement).visit(region) - calls = [c for c in calls if str(c.name).lower() in targets] - return calls +__all__ = [ + 'FieldOffloadTransformation', 'FieldPointerMap', + 'find_target_calls', 'find_offload_variables', + 'add_field_offload_calls', 'replace_kernel_args' +] class FieldOffloadTransformation(Transformation): @@ -92,103 +80,11 @@ def process_driver(self, driver, targets): if not DataOffloadTransformation._is_active_loki_data_region(region, targets): continue kernel_calls = find_target_calls(region, targets) - offload_variables = self.find_offload_variables(driver, kernel_calls) - device_ptrs = self._declare_device_ptrs(driver, offload_variables) + offload_variables = find_offload_variables(driver, kernel_calls, self.field_group_types) + device_ptrs = declare_device_ptrs(driver, offload_variables, self.deviceptr_prefix) offload_map = FieldPointerMap(device_ptrs, *offload_variables) - self._add_field_offload_calls(driver, region, offload_map) - self._replace_kernel_args(driver, kernel_calls, offload_map) - - def find_offload_variables(self, driver, calls): - inargs = () - inoutargs = () - outargs = () - - for call in calls: - if call.routine is BasicType.DEFERRED: - error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + - f'in {str(call.name).lower()}') - raise RuntimeError - for param, arg in call.arg_iter(): - if not isinstance(param, Array): - continue - try: - parent = arg.parent - if parent.type.dtype.name.lower() not in self.field_group_types: - warning(f'[Loki] Data offload: The parent object {parent.name} of type ' + - f'{parent.type.dtype} is not in the list of field wrapper types') - continue - except AttributeError: - warning(f'[Loki] Data offload: Raw array object {arg.name} encountered in' - + f' {driver.name} that is not wrapped by a Field API object') - continue - - if param.type.intent.lower() == 'in': - inargs += (arg, ) - if param.type.intent.lower() == 'inout': - inoutargs += (arg, ) - if param.type.intent.lower() == 'out': - outargs += (arg, ) - - inoutargs += tuple(v for v in inargs if v in outargs) - inargs = tuple(v for v in inargs if v not in inoutargs) - outargs = tuple(v for v in outargs if v not in inoutargs) - - # Filter out duplicates and return as tuple - inargs = tuple(dict.fromkeys(inargs)) - inoutargs = tuple(dict.fromkeys(inoutargs)) - outargs = tuple(dict.fromkeys(outargs)) - - return inargs, inoutargs, outargs - - def _declare_device_ptrs(self, driver, offload_variables): - device_ptrs = tuple(self._devptr_from_array(driver, a) for a in chain(*offload_variables)) - driver.variables += device_ptrs - return device_ptrs - - def _devptr_from_array(self, driver, a: sym.Array): - """ - Returns a contiguous pointer :any:`Variable` with types matching the array a - """ - shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) - devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) - base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) - devptr_name = self.deviceptr_prefix + base_name - if devptr_name in driver.variable_map: - warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + - f'variable named {devptr_name}') - devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) - return devptr - - def _add_field_offload_calls(self, driver, region, offload_map): - host_to_device = tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) - host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.inout_pairs) - host_to_device += tuple(field_get_device_data(self._get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) - device_to_host = tuple(field_sync_host(self._get_field_ptr_from_view(inarg), driver) - for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) - update_map = {region: host_to_device + (region,) + device_to_host} - Transformer(update_map, inplace=True).visit(driver.body) - - def _get_field_ptr_from_view(self, field_view): - type_chain = field_view.name.split('%') - field_type_name = 'F_' + type_chain[-1] - return field_view.parent.get_derived_type_member(field_type_name) - - def _replace_kernel_args(self, driver, kernel_calls, offload_map): - change_map = {} - offload_idx_expr = driver.variable_map[self.offload_index] - for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): - if len(arg.dimensions) != 0: - dims = arg.dimensions + (offload_idx_expr,) - else: - dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) - change_map[arg] = devptr.clone(dimensions=dims) - - arg_transformer = SubstituteExpressions(change_map, inplace=True) - for call in kernel_calls: - arg_transformer.visit(call) + add_field_offload_calls(driver, region, offload_map) + replace_kernel_args(driver, kernel_calls, offload_map, self.offload_index) class FieldPointerMap: @@ -255,3 +151,117 @@ def out_pairs(self): start = len(self.inargs)+len(self.inoutargs) for i, outarg in enumerate(self.outargs): yield outarg, self.devptrs[i+start] + + +def find_target_calls(region, targets): + """ + Returns a list of all calls to targets inside the region. + + Parameters + ---------- + :region: :any:`PragmaRegion` + :targets: collection of :any:`Subroutine` + Iterable object of subroutines or functions called + :returns: list of :any:`CallStatement` + """ + calls = FindNodes(CallStatement).visit(region) + calls = [c for c in calls if str(c.name).lower() in targets] + return calls + + +def find_offload_variables(driver, calls, field_group_types): + inargs = () + inoutargs = () + outargs = () + + for call in calls: + if call.routine is BasicType.DEFERRED: + error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + + f'in {str(call.name).lower()}') + raise RuntimeError + for param, arg in call.arg_iter(): + if not isinstance(param, Array): + continue + try: + parent = arg.parent + if parent.type.dtype.name.lower() not in field_group_types: + warning(f'[Loki] Data offload: The parent object {parent.name} of type ' + + f'{parent.type.dtype} is not in the list of field wrapper types') + continue + except AttributeError: + warning(f'[Loki] Data offload: Raw array object {arg.name} encountered in' + + f' {driver.name} that is not wrapped by a Field API object') + continue + + if param.type.intent.lower() == 'in': + inargs += (arg, ) + if param.type.intent.lower() == 'inout': + inoutargs += (arg, ) + if param.type.intent.lower() == 'out': + outargs += (arg, ) + + inoutargs += tuple(v for v in inargs if v in outargs) + inargs = tuple(v for v in inargs if v not in inoutargs) + outargs = tuple(v for v in outargs if v not in inoutargs) + + # Filter out duplicates and return as tuple + inargs = tuple(dict.fromkeys(inargs)) + inoutargs = tuple(dict.fromkeys(inoutargs)) + outargs = tuple(dict.fromkeys(outargs)) + + return inargs, inoutargs, outargs + + +def declare_device_ptrs(driver, offload_variables, deviceptr_prefix='loki_devptr_'): + + def _devptr_from_array(driver, a: sym.Array): + """ + Returns a contiguous pointer :any:`Variable` with types matching the array a + """ + shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) + devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) + base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) + devptr_name = deviceptr_prefix + base_name + if devptr_name in driver.variable_map: + warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + + f'variable named {devptr_name}') + devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) + return devptr + + device_ptrs = tuple(_devptr_from_array(driver, a) for a in chain(*offload_variables)) + driver.variables += device_ptrs + return device_ptrs + + +def add_field_offload_calls(driver, region, offload_map): + + def _get_field_ptr_from_view(field_view): + type_chain = field_view.name.split('%') + field_type_name = 'F_' + type_chain[-1] + return field_view.parent.get_derived_type_member(field_type_name) + + host_to_device = tuple(field_get_device_data(_get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) + host_to_device += tuple(field_get_device_data(_get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.inout_pairs) + host_to_device += tuple(field_get_device_data(_get_field_ptr_from_view(inarg), devptr, + FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) + device_to_host = tuple(field_sync_host(_get_field_ptr_from_view(inarg), driver) + for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) + update_map = {region: host_to_device + (region,) + device_to_host} + Transformer(update_map, inplace=True).visit(driver.body) + + +def replace_kernel_args(driver, kernel_calls, offload_map, offload_index): + change_map = {} + offload_idx_expr = driver.variable_map[offload_index] + for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): + if len(arg.dimensions) != 0: + dims = arg.dimensions + (offload_idx_expr,) + else: + dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) + change_map[arg] = devptr.clone(dimensions=dims) + + arg_transformer = SubstituteExpressions(change_map, inplace=True) + for call in kernel_calls: + arg_transformer.visit(call) From 36e5221546b76ab08bf38c17ab327c8534d93db6 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Tue, 26 Nov 2024 19:08:09 +0000 Subject: [PATCH 04/15] DataOffload: Use single `state_module` fixture in field api tests --- .../data_offload/tests/test_field_api.py | 119 ++++++++---------- 1 file changed, 51 insertions(+), 68 deletions(-) diff --git a/loki/transformations/data_offload/tests/test_field_api.py b/loki/transformations/data_offload/tests/test_field_api.py index aaddbc543..b0ea66fc0 100644 --- a/loki/transformations/data_offload/tests/test_field_api.py +++ b/loki/transformations/data_offload/tests/test_field_api.py @@ -7,12 +7,12 @@ import pytest -from loki import Sourcefile +from loki import Sourcefile, Module +import loki.expression.symbols as sym from loki.frontend import available_frontends -from loki.logging import log_levels from loki.ir import FindNodes, Pragma, CallStatement -import loki.expression.symbols as sym -from loki.module import Module +from loki.logging import log_levels + from loki.transformations import FieldOffloadTransformation @@ -25,6 +25,7 @@ def fixture_parkind_mod(tmp_path, frontend): """ return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + @pytest.fixture(name="field_module") def fixture_field_module(tmp_path, frontend): fcode = """ @@ -54,13 +55,13 @@ def fixture_field_module(tmp_path, frontend): end subroutine end module """ - return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +@pytest.fixture(name="state_module") +def fixture_state_module(tmp_path, parkind_mod, field_module, frontend): # pylint: disable=unused-argument fcode = """ - module driver_mod + module state_mod use parkind1, only: jprb use field_module, only: field_2rb, field_3rb implicit none @@ -78,6 +79,21 @@ def test_field_offload(frontend, parkind_mod, field_module, tmp_path): # pylint class(state_type), intent(in) :: self integer, intent(in) :: idx end subroutine + end module state_mod +""" + return Module.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload(frontend, state_module, tmp_path): + fcode = """ + module driver_mod + use state_mod, only: state_type + use parkind1, only: jprb + use field_module, only: field_2rb, field_3rb + implicit none + + contains subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev @@ -109,7 +125,9 @@ def test_field_offload(frontend, parkind_mod, field_module, tmp_path): # pylint end subroutine driver_routine end module driver_mod """ - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, @@ -215,27 +233,15 @@ def test_field_offload_slices(frontend, parkind_mod, field_module, tmp_path): # @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +def test_field_offload_multiple_calls(frontend, state_module, tmp_path): fcode = """ module driver_mod use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb + use state_mod, only: state_type implicit none - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - contains - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev real(kind=jprb), intent(in) :: a(nlon,nlev) @@ -270,7 +276,9 @@ def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_p end module driver_mod """ - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, @@ -306,7 +314,7 @@ def test_field_offload_multiple_calls(frontend, parkind_mod, field_module, tmp_p @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +def test_field_offload_no_targets(frontend, state_module, tmp_path): fother = """ module another_module implicit none @@ -320,28 +328,17 @@ def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path) end subroutine end module """ + fcode = """ module driver_mod use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb + use state_mod, only: state_type use another_module, only: another_kernel implicit none - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - contains - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev real(kind=jprb), intent(in) :: a(nlon,nlev) @@ -374,7 +371,9 @@ def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path) """ Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module,xmods=[tmp_path] + ) driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, @@ -402,7 +401,7 @@ def test_field_offload_no_targets(frontend, parkind_mod, field_module, tmp_path) @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +def test_field_offload_unknown_kernel(caplog, frontend, state_module, tmp_path): fother = """ module another_module implicit none @@ -416,26 +415,16 @@ def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_modul end subroutine end module """ + fcode = """ module driver_mod use parkind1, only: jprb + use state_mod, only: state_type use another_module, only: another_kernel implicit none - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - contains - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - subroutine driver_routine(nlon, nlev, state) integer, intent(in) :: nlon, nlev type(state_type), intent(inout) :: state @@ -453,7 +442,9 @@ def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_modul """ Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' @@ -470,7 +461,7 @@ def test_field_offload_unknown_kernel(caplog, frontend, parkind_mod, field_modul @pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp_path): # pylint: disable=unused-argument +def test_field_offload_warnings(caplog, frontend, state_module, tmp_path): fother_state = """ module state_type_mod implicit none @@ -488,6 +479,7 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp end subroutine end module """ + fother_mod= """ module another_module implicit none @@ -501,29 +493,18 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp end subroutine end module """ + fcode = """ module driver_mod use state_type_mod, only: state_type2 use parkind1, only: jprb - use field_module, only: field_2rb, field_3rb + use state_mod, only: state_type use another_module, only: another_kernel implicit none - type state_type - real(kind=jprb), dimension(10,10), pointer :: a, b, c - class(field_3rb), pointer :: f_a, f_b, f_c - contains - procedure :: update_view => state_update_view - end type state_type - contains - subroutine state_update_view(self, idx) - class(state_type), intent(in) :: self - integer, intent(in) :: idx - end subroutine - subroutine kernel_routine(nlon, nlev, a, b, c) integer, intent(in) :: nlon, nlev real(kind=jprb), intent(in) :: a(nlon,nlev) @@ -560,7 +541,9 @@ def test_field_offload_warnings(caplog, frontend, parkind_mod, field_module, tmp """ Sourcefile.from_source(fother_state, frontend=frontend, xmods=[tmp_path]) Sourcefile.from_source(fother_mod, frontend=frontend, xmods=[tmp_path]) - driver_mod = Sourcefile.from_source(fcode, frontend=frontend, xmods=[tmp_path])['driver_mod'] + driver_mod = Sourcefile.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + )['driver_mod'] driver = driver_mod['driver_routine'] deviceptr_prefix = 'loki_devptr_prefix_' From 6f4f50c342854309773f1a90e85b5f475a206736 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 28 Nov 2024 05:01:55 +0000 Subject: [PATCH 05/15] DataOffload: Add test for aliasing argument pointers in kernel calls --- .../data_offload/tests/test_field_api.py | 75 ++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/loki/transformations/data_offload/tests/test_field_api.py b/loki/transformations/data_offload/tests/test_field_api.py index b0ea66fc0..bb5f322cc 100644 --- a/loki/transformations/data_offload/tests/test_field_api.py +++ b/loki/transformations/data_offload/tests/test_field_api.py @@ -10,7 +10,7 @@ from loki import Sourcefile, Module import loki.expression.symbols as sym from loki.frontend import available_frontends -from loki.ir import FindNodes, Pragma, CallStatement +from loki.ir import nodes as ir, FindNodes, Pragma, CallStatement from loki.logging import log_levels from loki.transformations import FieldOffloadTransformation @@ -68,7 +68,9 @@ def fixture_state_module(tmp_path, parkind_mod, field_module, frontend): # pyli type state_type real(kind=jprb), dimension(10,10), pointer :: a, b, c + real(kind=jprb), pointer :: d(10,10,10) class(field_3rb), pointer :: f_a, f_b, f_c + class(field_4rb), pointer :: f_d contains procedure :: update_view => state_update_view end type state_type @@ -561,3 +563,74 @@ def test_field_offload_warnings(caplog, frontend, state_module, tmp_path): ' list of field wrapper types') in caplog.records[1].message assert ('[Loki] Data offload: The routine driver_routine already has a' + ' variable named loki_devptr_prefix_state_b') in caplog.records[2].message + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_aliasing(frontend, state_module, tmp_path): + fcode = """ + module driver_mod + use state_mod, only: state_type + use parkind1, only: jprb + implicit none + + contains + + subroutine kernel_routine(nlon, nlev, a1, a2, a3) + integer, intent(in) :: nlon, nlev + real(kind=jprb), intent(in) :: a1(nlon) + real(kind=jprb), intent(inout) :: a2(nlon) + real(kind=jprb), intent(out) :: a3(nlon) + integer :: i + + do i=1, nlon + a1(i) = a2(i) + 0.1 + a3(i) = 0.1 + end do + end subroutine kernel_routine + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i + + !$loki data + do i=1,nlev + call state%update_view(i) + call kernel_routine(nlon, nlev, state%a(:,1), state%a(:,2), state%a(:,3)) + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) + driver = driver_mod['driver_routine'] + + field_offload = FieldOffloadTransformation( + devptr_prefix='', offload_index='i', field_group_types=['state_type'] + ) + driver.apply(field_offload, role='driver', targets=['kernel_routine']) + + calls = FindNodes(ir.CallStatement).visit(driver.body) + kernel_call = next(c for c in calls if c.name=='kernel_routine') + + assert 'state_a' in driver.variable_map + assert driver.variable_map['state_a'].type.shape == (':', ':', ':') + + assert kernel_call.arguments[:2] == ('nlon', 'nlev') + assert kernel_call.arguments[2] == 'state_a(:,1,i)' + assert kernel_call.arguments[3] == 'state_a(:,2,i)' + assert kernel_call.arguments[4] == 'state_a(:,3,i)' + + assert len(calls) == 3 + assert calls[0].name == 'state%f_a%get_device_data_rdwr' + assert calls[0].arguments == ('state_a',) + assert calls[1] == kernel_call + assert calls[2].name == 'state%f_a%sync_host_rdwr' + assert calls[2].arguments == () + + decls = FindNodes(ir.VariableDeclaration).visit(driver.spec) + assert len(decls) == 4 + assert decls[-1].symbols == ('state_a(:,:,:)',) From 882f2283f59eec37b161fbc0a3a69f65c25aae30 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 28 Nov 2024 08:03:13 +0000 Subject: [PATCH 06/15] FieldAPI: Move utility classes and methods to dedicated sub-package --- loki/transformations/__init__.py | 1 + .../transformations/data_offload/field_api.py | 80 +-------- loki/transformations/field_api.py | 170 ++++++++++++++++++ loki/transformations/parallel/field_api.py | 88 +-------- .../parallel/tests/test_field_api.py | 11 +- 5 files changed, 188 insertions(+), 162 deletions(-) create mode 100644 loki/transformations/field_api.py diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index 236607132..59e5d2bd3 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -18,6 +18,7 @@ from loki.transformations.data_offload import * # noqa from loki.transformations.drhook import * # noqa from loki.transformations.extract import * # noqa +from loki.transformations.field_api import * # noqa from loki.transformations.hoist_variables import * # noqa from loki.transformations.idempotence import * # noqa from loki.transformations.inline import * # noqa diff --git a/loki/transformations/data_offload/field_api.py b/loki/transformations/data_offload/field_api.py index b61e99fcf..fedcbe39a 100644 --- a/loki/transformations/data_offload/field_api.py +++ b/loki/transformations/data_offload/field_api.py @@ -19,16 +19,18 @@ from loki.types import BasicType from loki.transformations.data_offload.offload import DataOffloadTransformation -from loki.transformations.parallel import ( - FieldAPITransferType, field_get_device_data, field_sync_host, - remove_field_api_view_updates +from loki.transformations.field_api import ( + FieldAPITransferType, FieldPointerMap, field_get_device_data, + field_sync_host ) +from loki.transformations.parallel import remove_field_api_view_updates + __all__ = [ - 'FieldOffloadTransformation', 'FieldPointerMap', - 'find_target_calls', 'find_offload_variables', - 'add_field_offload_calls', 'replace_kernel_args' + 'FieldOffloadTransformation', 'find_target_calls', + 'find_offload_variables', 'add_field_offload_calls', + 'replace_kernel_args' ] @@ -87,72 +89,6 @@ def process_driver(self, driver, targets): replace_kernel_args(driver, kernel_calls, offload_map, self.offload_index) -class FieldPointerMap: - """ - Helper class to map FIELD API pointers to intents and access descriptors. - - This utility is used to store arrays passed to target kernel calls - and the corresponding device pointers added by the transformation. - - The pointer/array variable pairs are exposed through the class - properties, based on the intent of the kernel argument. - """ - def __init__(self, devptrs, inargs, inoutargs, outargs): - self.inargs = inargs - self.inoutargs = inoutargs - self.outargs = outargs - self.devptrs = devptrs - - - @property - def in_pairs(self): - """ - Iterator that yields array/pointer pairs for kernel arguments of intent(in). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - for i, inarg in enumerate(self.inargs): - yield inarg, self.devptrs[i] - - @property - def inout_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(inout). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - start = len(self.inargs) - for i, inoutarg in enumerate(self.inoutargs): - yield inoutarg, self.devptrs[i+start] - - @property - def out_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(out) - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - - start = len(self.inargs)+len(self.inoutargs) - for i, outarg in enumerate(self.outargs): - yield outarg, self.devptrs[i+start] - - def find_target_calls(region, targets): """ Returns a list of all calls to targets inside the region. diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py new file mode 100644 index 000000000..b7cb87c08 --- /dev/null +++ b/loki/transformations/field_api.py @@ -0,0 +1,170 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +A set utility classes for dealing with FIELD API boilerplate in +parallel kernels and offload regions. +""" + +from enum import Enum + +from loki.expression import symbols as sym +from loki.ir import nodes as ir +from loki.scope import Scope + + +__all__ = [ + 'FieldAPITransferType', 'FieldPointerMap', 'get_field_type', + 'field_get_device_data', 'field_sync_host' +] + + +class FieldAPITransferType(Enum): + READ_ONLY = 1 + READ_WRITE = 2 + WRITE_ONLY = 3 + + +class FieldPointerMap: + """ + Helper class to map FIELD API pointers to intents and access descriptors. + + This utility is used to store arrays passed to target kernel calls + and the corresponding device pointers added by the transformation. + + The pointer/array variable pairs are exposed through the class + properties, based on the intent of the kernel argument. + """ + def __init__(self, devptrs, inargs, inoutargs, outargs): + self.inargs = inargs + self.inoutargs = inoutargs + self.outargs = outargs + self.devptrs = devptrs + + + @property + def in_pairs(self): + """ + Iterator that yields array/pointer pairs for kernel arguments of intent(in). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + for i, inarg in enumerate(self.inargs): + yield inarg, self.devptrs[i] + + @property + def inout_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(inout). + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + start = len(self.inargs) + for i, inoutarg in enumerate(self.inoutargs): + yield inoutarg, self.devptrs[i+start] + + @property + def out_pairs(self): + """ + Iterator that yields array/pointer pairs for arguments with intent(out) + + Yields + ______ + :any:`Array` + Original kernel call argument + :any:`Array` + Corresponding device pointer added by the transformation. + """ + + start = len(self.inargs)+len(self.inoutargs) + for i, outarg in enumerate(self.outargs): + yield outarg, self.devptrs[i+start] + + +def get_field_type(a: sym.Array) -> sym.DerivedType: + """ + Returns the corresponding FIELD API type for an array. + + This function is IFS specific and assumes that the + type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB + """ + type_map = ["jprb", + "jpit", + "jpis", + "jpim", + "jpib", + "jpia", + "jprt", + "jprs", + "jprm", + "jprd", + "jplm"] + type_name = a.type.kind.name + + assert type_name.lower() in type_map, ('Error array type kind is: ' + f'"{type_name}" which is not a valid IFS type specifier') + rank = len(a.shape) + field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4].lower()) + return field_type + + +def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): + """ + Utility function to generate a :any:`CallStatement` corresponding to a Field API + ``GET_DEVICE_DATA`` call. + + Parameters + ---------- + field_ptr: pointer to field object + Pointer to the field to call ``GET_DEVICE_DATA`` from. + dev_ptr: :any:`Array` + Device pointer array + transfer_type: :any:`FieldAPITransferType` + Field API transfer type to determine which ``GET_DEVICE_DATA`` method to call. + scope: :any:`Scope` + Scope of the created :any:`CallStatement` + """ + if not isinstance(transfer_type, FieldAPITransferType): + raise TypeError(f"transfer_type must be of type FieldAPITransferType, but is of type {type(transfer_type)}") + if transfer_type == FieldAPITransferType.READ_ONLY: + suffix = 'RDONLY' + elif transfer_type == FieldAPITransferType.READ_WRITE: + suffix = 'RDWR' + elif transfer_type == FieldAPITransferType.WRITE_ONLY: + suffix = 'WRONLY' + else: + suffix = '' + procedure_name = 'GET_DEVICE_DATA_' + suffix + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), + arguments=(dev_ptr.clone(dimensions=None),), ) + + +def field_sync_host(field_ptr, scope): + """ + Utility function to generate a :any:`CallStatement` corresponding to a Field API + ``SYNC_HOST`` call. + + Parameters + ---------- + field_ptr: pointer to field object + Pointer to the field to call ``SYNC_HOST`` from. + scope: :any:`Scope` + Scope of the created :any:`CallStatement` + """ + + procedure_name = 'SYNC_HOST_RDWR' + return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_api.py index 4b5705770..e1e06b491 100644 --- a/loki/transformations/parallel/field_api.py +++ b/loki/transformations/parallel/field_api.py @@ -9,18 +9,16 @@ Transformation utilities to manage and inject FIELD-API boilerplate code. """ -from enum import Enum from loki.expression import symbols as sym from loki.ir import ( nodes as ir, FindNodes, FindVariables, Transformer ) -from loki.scope import Scope from loki.logging import warning from loki.tools import as_tuple + __all__ = [ - 'remove_field_api_view_updates', 'add_field_api_view_updates', 'get_field_type', - 'field_get_device_data', 'field_sync_host', 'FieldAPITransferType' + 'remove_field_api_view_updates', 'add_field_api_view_updates' ] @@ -152,85 +150,3 @@ def visit_Loop(self, loop, **kwargs): # pylint: disable=unused-argument return loop routine.body = InsertFieldAPIViewsTransformer().visit(routine.body, scope=routine) - - -def get_field_type(a: sym.Array) -> sym.DerivedType: - """ - Returns the corresponding FIELD API type for an array. - - This function is IFS specific and assumes that the - type is an array declared with one of the IFS type specifiers, e.g. KIND=JPRB - """ - type_map = ["jprb", - "jpit", - "jpis", - "jpim", - "jpib", - "jpia", - "jprt", - "jprs", - "jprm", - "jprd", - "jplm"] - type_name = a.type.kind.name - - assert type_name.lower() in type_map, ('Error array type kind is: ' - f'"{type_name}" which is not a valid IFS type specifier') - rank = len(a.shape) - field_type = sym.DerivedType(name="field_" + str(rank) + type_name[2:4].lower()) - return field_type - - - -class FieldAPITransferType(Enum): - READ_ONLY = 1 - READ_WRITE = 2 - WRITE_ONLY = 3 - - -def field_get_device_data(field_ptr, dev_ptr, transfer_type: FieldAPITransferType, scope: Scope): - """ - Utility function to generate a :any:`CallStatement` corresponding to a Field API - ``GET_DEVICE_DATA`` call. - - Parameters - ---------- - field_ptr: pointer to field object - Pointer to the field to call ``GET_DEVICE_DATA`` from. - dev_ptr: :any:`Array` - Device pointer array - transfer_type: :any:`FieldAPITransferType` - Field API transfer type to determine which ``GET_DEVICE_DATA`` method to call. - scope: :any:`Scope` - Scope of the created :any:`CallStatement` - """ - if not isinstance(transfer_type, FieldAPITransferType): - raise TypeError(f"transfer_type must be of type FieldAPITransferType, but is of type {type(transfer_type)}") - if transfer_type == FieldAPITransferType.READ_ONLY: - suffix = 'RDONLY' - elif transfer_type == FieldAPITransferType.READ_WRITE: - suffix = 'RDWR' - elif transfer_type == FieldAPITransferType.WRITE_ONLY: - suffix = 'WRONLY' - else: - suffix = '' - procedure_name = 'GET_DEVICE_DATA_' + suffix - return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), - arguments=(dev_ptr.clone(dimensions=None),), ) - - -def field_sync_host(field_ptr, scope): - """ - Utility function to generate a :any:`CallStatement` corresponding to a Field API - ``SYNC_HOST`` call. - - Parameters - ---------- - field_ptr: pointer to field object - Pointer to the field to call ``SYNC_HOST`` from. - scope: :any:`Scope` - Scope of the created :any:`CallStatement` - """ - - procedure_name = 'SYNC_HOST_RDWR' - return ir.CallStatement(name=sym.ProcedureSymbol(procedure_name, parent=field_ptr, scope=scope), arguments=()) diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_api.py index 267c43e91..3e56a19e9 100644 --- a/loki/transformations/parallel/tests/test_field_api.py +++ b/loki/transformations/parallel/tests/test_field_api.py @@ -12,13 +12,16 @@ from loki.ir import nodes as ir, FindNodes from loki.expression import symbols as sym from loki.scope import Scope -from loki.transformations.parallel import ( - remove_field_api_view_updates, add_field_api_view_updates, - get_field_type, field_get_device_data, FieldAPITransferType -) from loki.types import BasicType, SymbolAttributes from loki.logging import WARNING +from loki.transformations.field_api import ( + get_field_type, field_get_device_data, FieldAPITransferType +) +from loki.transformations.parallel import ( + remove_field_api_view_updates, add_field_api_view_updates +) + @pytest.mark.parametrize('frontend', available_frontends( skip=[(OMNI, 'OMNI needs full type definitions for derived types')] From de3c788e104375e029937546081b1ae6c1a7e467 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 28 Nov 2024 08:10:09 +0000 Subject: [PATCH 07/15] FieldAPI: Rename Field API modules in `parallel` and `data_offload` --- loki/transformations/data_offload/__init__.py | 2 +- .../data_offload/{field_api.py => field_offload.py} | 0 .../tests/{test_field_api.py => test_field_offload.py} | 0 loki/transformations/parallel/__init__.py | 2 +- loki/transformations/parallel/{field_api.py => field_views.py} | 0 .../parallel/tests/{test_field_api.py => test_field_views.py} | 0 6 files changed, 2 insertions(+), 2 deletions(-) rename loki/transformations/data_offload/{field_api.py => field_offload.py} (100%) rename loki/transformations/data_offload/tests/{test_field_api.py => test_field_offload.py} (100%) rename loki/transformations/parallel/{field_api.py => field_views.py} (100%) rename loki/transformations/parallel/tests/{test_field_api.py => test_field_views.py} (100%) diff --git a/loki/transformations/data_offload/__init__.py b/loki/transformations/data_offload/__init__.py index b6046c746..602f83226 100644 --- a/loki/transformations/data_offload/__init__.py +++ b/loki/transformations/data_offload/__init__.py @@ -8,6 +8,6 @@ Sub-package providing data offload transformations. """ -from loki.transformations.data_offload.field_api import * # noqa +from loki.transformations.data_offload.field_offload import * # noqa from loki.transformations.data_offload.global_var import * # noqa from loki.transformations.data_offload.offload import * # noqa diff --git a/loki/transformations/data_offload/field_api.py b/loki/transformations/data_offload/field_offload.py similarity index 100% rename from loki/transformations/data_offload/field_api.py rename to loki/transformations/data_offload/field_offload.py diff --git a/loki/transformations/data_offload/tests/test_field_api.py b/loki/transformations/data_offload/tests/test_field_offload.py similarity index 100% rename from loki/transformations/data_offload/tests/test_field_api.py rename to loki/transformations/data_offload/tests/test_field_offload.py diff --git a/loki/transformations/parallel/__init__.py b/loki/transformations/parallel/__init__.py index 0071a9814..07bcacd7a 100644 --- a/loki/transformations/parallel/__init__.py +++ b/loki/transformations/parallel/__init__.py @@ -11,5 +11,5 @@ """ from loki.transformations.parallel.block_loop import * # noqa -from loki.transformations.parallel.field_api import * # noqa +from loki.transformations.parallel.field_views import * # noqa from loki.transformations.parallel.openmp_region import * # noqa diff --git a/loki/transformations/parallel/field_api.py b/loki/transformations/parallel/field_views.py similarity index 100% rename from loki/transformations/parallel/field_api.py rename to loki/transformations/parallel/field_views.py diff --git a/loki/transformations/parallel/tests/test_field_api.py b/loki/transformations/parallel/tests/test_field_views.py similarity index 100% rename from loki/transformations/parallel/tests/test_field_api.py rename to loki/transformations/parallel/tests/test_field_views.py From b0aa69aaf8617d580f72472290e441b7fda41a75 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 4 Dec 2024 07:53:57 +0000 Subject: [PATCH 08/15] FieldAPI: Internalise data pointer generation to FieldPointerMapp --- .../data_offload/field_offload.py | 29 +++++++------------ loki/transformations/field_api.py | 28 ++++++++++++++---- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index fedcbe39a..bd51725b6 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -83,8 +83,8 @@ def process_driver(self, driver, targets): continue kernel_calls = find_target_calls(region, targets) offload_variables = find_offload_variables(driver, kernel_calls, self.field_group_types) - device_ptrs = declare_device_ptrs(driver, offload_variables, self.deviceptr_prefix) - offload_map = FieldPointerMap(device_ptrs, *offload_variables) + offload_map = FieldPointerMap(*offload_variables, ptr_prefix=self.deviceptr_prefix) + declare_device_ptrs(driver, deviceptrs=offload_map.dataptrs) add_field_offload_calls(driver, region, offload_map) replace_kernel_args(driver, kernel_calls, offload_map, self.offload_index) @@ -148,25 +148,16 @@ def find_offload_variables(driver, calls, field_group_types): return inargs, inoutargs, outargs -def declare_device_ptrs(driver, offload_variables, deviceptr_prefix='loki_devptr_'): - - def _devptr_from_array(driver, a: sym.Array): - """ - Returns a contiguous pointer :any:`Variable` with types matching the array a - """ - shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) - devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) - base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) - devptr_name = deviceptr_prefix + base_name - if devptr_name in driver.variable_map: +def declare_device_ptrs(driver, deviceptrs): + """ + Add a set of data pointer declarations to a given :any:`Subroutine` + """ + for devptr in deviceptrs: + if devptr.name in driver.variable_map: warning(f'[Loki] Data offload: The routine {driver.name} already has a ' + - f'variable named {devptr_name}') - devptr = sym.Variable(name=devptr_name, type=devptr_type, dimensions=shape) - return devptr + f'variable named {devptr.name}') - device_ptrs = tuple(_devptr_from_array(driver, a) for a in chain(*offload_variables)) - driver.variables += device_ptrs - return device_ptrs + driver.variables += deviceptrs def add_field_offload_calls(driver, region, offload_map): diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py index b7cb87c08..b5097fdd3 100644 --- a/loki/transformations/field_api.py +++ b/loki/transformations/field_api.py @@ -11,6 +11,7 @@ """ from enum import Enum +from itertools import chain from loki.expression import symbols as sym from loki.ir import nodes as ir @@ -39,12 +40,29 @@ class FieldPointerMap: The pointer/array variable pairs are exposed through the class properties, based on the intent of the kernel argument. """ - def __init__(self, devptrs, inargs, inoutargs, outargs): + def __init__(self, inargs, inoutargs, outargs, ptr_prefix='loki_devptr_'): self.inargs = inargs self.inoutargs = inoutargs self.outargs = outargs - self.devptrs = devptrs + self.ptr_prefix = ptr_prefix + + def dataptr_from_array(self, a: sym.Array): + """ + Returns a contiguous pointer :any:`Variable` with types matching the array :data:`a`. + """ + shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) + devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) + base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) + return sym.Variable(name=self.ptr_prefix + base_name, type=devptr_type, dimensions=shape) + + @property + def dataptrs(self): + """ Create a list of contiguous data pointer symbols """ + return tuple( + self.dataptr_from_array(a) + for a in chain(*(self.inargs, self.inoutargs, self.outargs)) + ) @property def in_pairs(self): @@ -59,7 +77,7 @@ def in_pairs(self): Corresponding device pointer added by the transformation. """ for i, inarg in enumerate(self.inargs): - yield inarg, self.devptrs[i] + yield inarg, self.dataptrs[i] @property def inout_pairs(self): @@ -75,7 +93,7 @@ def inout_pairs(self): """ start = len(self.inargs) for i, inoutarg in enumerate(self.inoutargs): - yield inoutarg, self.devptrs[i+start] + yield inoutarg, self.dataptrs[i+start] @property def out_pairs(self): @@ -92,7 +110,7 @@ def out_pairs(self): start = len(self.inargs)+len(self.inoutargs) for i, outarg in enumerate(self.outargs): - yield outarg, self.devptrs[i+start] + yield outarg, self.dataptrs[i+start] def get_field_type(a: sym.Array) -> sym.DerivedType: From b145e4d9d1eb00bf16c863b9c54c9829a37ac2e3 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 4 Dec 2024 08:56:30 +0000 Subject: [PATCH 09/15] FieldAPI: Internalise data transfer call generation and drop pairs --- .../data_offload/field_offload.py | 29 +++---- loki/transformations/field_api.py | 75 +++++++++---------- 2 files changed, 44 insertions(+), 60 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index bd51725b6..a5d1e24f1 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -19,10 +19,7 @@ from loki.types import BasicType from loki.transformations.data_offload.offload import DataOffloadTransformation -from loki.transformations.field_api import ( - FieldAPITransferType, FieldPointerMap, field_get_device_data, - field_sync_host -) +from loki.transformations.field_api import FieldPointerMap from loki.transformations.parallel import remove_field_api_view_updates @@ -83,7 +80,9 @@ def process_driver(self, driver, targets): continue kernel_calls = find_target_calls(region, targets) offload_variables = find_offload_variables(driver, kernel_calls, self.field_group_types) - offload_map = FieldPointerMap(*offload_variables, ptr_prefix=self.deviceptr_prefix) + offload_map = FieldPointerMap( + *offload_variables, scope=driver, ptr_prefix=self.deviceptr_prefix + ) declare_device_ptrs(driver, deviceptrs=offload_map.dataptrs) add_field_offload_calls(driver, region, offload_map) replace_kernel_args(driver, kernel_calls, offload_map, self.offload_index) @@ -162,27 +161,17 @@ def declare_device_ptrs(driver, deviceptrs): def add_field_offload_calls(driver, region, offload_map): - def _get_field_ptr_from_view(field_view): - type_chain = field_view.name.split('%') - field_type_name = 'F_' + type_chain[-1] - return field_view.parent.get_derived_type_member(field_type_name) - - host_to_device = tuple(field_get_device_data(_get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_ONLY, driver) for inarg, devptr in offload_map.in_pairs) - host_to_device += tuple(field_get_device_data(_get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.inout_pairs) - host_to_device += tuple(field_get_device_data(_get_field_ptr_from_view(inarg), devptr, - FieldAPITransferType.READ_WRITE, driver) for inarg, devptr in offload_map.out_pairs) - device_to_host = tuple(field_sync_host(_get_field_ptr_from_view(inarg), driver) - for inarg, _ in chain(offload_map.inout_pairs, offload_map.out_pairs)) - update_map = {region: host_to_device + (region,) + device_to_host} + update_map = { + region: offload_map.host_to_device_calls + (region,) + offload_map.sync_host_calls + } Transformer(update_map, inplace=True).visit(driver.body) def replace_kernel_args(driver, kernel_calls, offload_map, offload_index): change_map = {} offload_idx_expr = driver.variable_map[offload_index] - for arg, devptr in chain(offload_map.in_pairs, offload_map.inout_pairs, offload_map.out_pairs): + for arg in chain(offload_map.inargs, offload_map.inoutargs, offload_map.outargs): + devptr = offload_map.dataptr_from_array(arg) if len(arg.dimensions) != 0: dims = arg.dimensions + (offload_idx_expr,) else: diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py index b5097fdd3..826c6fb71 100644 --- a/loki/transformations/field_api.py +++ b/loki/transformations/field_api.py @@ -40,11 +40,13 @@ class FieldPointerMap: The pointer/array variable pairs are exposed through the class properties, based on the intent of the kernel argument. """ - def __init__(self, inargs, inoutargs, outargs, ptr_prefix='loki_devptr_'): + def __init__(self, inargs, inoutargs, outargs, scope, ptr_prefix='loki_devptr_'): self.inargs = inargs self.inoutargs = inoutargs self.outargs = outargs + self.scope = scope + self.ptr_prefix = ptr_prefix def dataptr_from_array(self, a: sym.Array): @@ -56,6 +58,15 @@ def dataptr_from_array(self, a: sym.Array): base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) return sym.Variable(name=self.ptr_prefix + base_name, type=devptr_type, dimensions=shape) + @staticmethod + def field_ptr_from_view(field_view): + """ + Returns a symbol for the pointer to the corresponding Field object. + """ + type_chain = field_view.name.split('%') + field_type_name = 'F_' + type_chain[-1] + return field_view.parent.get_derived_type_member(field_type_name) + @property def dataptrs(self): """ Create a list of contiguous data pointer symbols """ @@ -65,52 +76,36 @@ def dataptrs(self): ) @property - def in_pairs(self): + def host_to_device_calls(self): """ - Iterator that yields array/pointer pairs for kernel arguments of intent(in). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. + Returns a tuple of :any:`CallStatement` for host-to-device transfers on fields. """ - for i, inarg in enumerate(self.inargs): - yield inarg, self.dataptrs[i] + READ_ONLY, READ_WRITE = FieldAPITransferType.READ_ONLY, FieldAPITransferType.READ_WRITE - @property - def inout_pairs(self): - """ - Iterator that yields array/pointer pairs for arguments with intent(inout). - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. - """ - start = len(self.inargs) - for i, inoutarg in enumerate(self.inoutargs): - yield inoutarg, self.dataptrs[i+start] + host_to_device = tuple(field_get_device_data( + self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_ONLY, scope=self.scope + ) for arg in self.inargs) + host_to_device += tuple(field_get_device_data( + self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope + ) for arg in self.inoutargs) + host_to_device += tuple(field_get_device_data( + self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope + ) for arg in self.outargs) + + return host_to_device @property - def out_pairs(self): + def sync_host_calls(self): """ - Iterator that yields array/pointer pairs for arguments with intent(out) - - Yields - ______ - :any:`Array` - Original kernel call argument - :any:`Array` - Corresponding device pointer added by the transformation. + Returns a tuple of :any:`CallStatement` for host-synchronization transfers on fields. """ - - start = len(self.inargs)+len(self.inoutargs) - for i, outarg in enumerate(self.outargs): - yield outarg, self.dataptrs[i+start] + sync_host = tuple( + field_sync_host(self.field_ptr_from_view(arg), scope=self.scope) for arg in self.inoutargs + ) + sync_host += tuple( + field_sync_host(self.field_ptr_from_view(arg), scope=self.scope) for arg in self.outargs + ) + return sync_host def get_field_type(a: sym.Array) -> sym.DerivedType: From 2e4647676f5722b5a0ecbaf5a1cb182d3e8137d1 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 4 Dec 2024 13:13:52 +0000 Subject: [PATCH 10/15] FieldAPI: Filter duplicates in FieldPointerMap when generating calls --- .../data_offload/field_offload.py | 9 ------ loki/transformations/field_api.py | 32 +++++++++++++------ 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index a5d1e24f1..d09d77b98 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -135,15 +135,6 @@ def find_offload_variables(driver, calls, field_group_types): if param.type.intent.lower() == 'out': outargs += (arg, ) - inoutargs += tuple(v for v in inargs if v in outargs) - inargs = tuple(v for v in inargs if v not in inoutargs) - outargs = tuple(v for v in outargs if v not in inoutargs) - - # Filter out duplicates and return as tuple - inargs = tuple(dict.fromkeys(inargs)) - inoutargs = tuple(dict.fromkeys(inoutargs)) - outargs = tuple(dict.fromkeys(outargs)) - return inargs, inoutargs, outargs diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py index 826c6fb71..43f9b8935 100644 --- a/loki/transformations/field_api.py +++ b/loki/transformations/field_api.py @@ -41,9 +41,15 @@ class FieldPointerMap: properties, based on the intent of the kernel argument. """ def __init__(self, inargs, inoutargs, outargs, scope, ptr_prefix='loki_devptr_'): - self.inargs = inargs - self.inoutargs = inoutargs - self.outargs = outargs + # Ensure no duplication between in/inout/out args + inoutargs += tuple(v for v in inargs if v in outargs) + inargs = tuple(v for v in inargs if v not in inoutargs) + outargs = tuple(v for v in outargs if v not in inoutargs) + + # Filter out duplicates and return as tuple + self.inargs = tuple(dict.fromkeys(inargs)) + self.inoutargs = tuple(dict.fromkeys(inoutargs)) + self.outargs = tuple(dict.fromkeys(outargs)) self.scope = scope @@ -70,10 +76,10 @@ def field_ptr_from_view(field_view): @property def dataptrs(self): """ Create a list of contiguous data pointer symbols """ - return tuple( + return tuple(dict.fromkeys( self.dataptr_from_array(a) for a in chain(*(self.inargs, self.inoutargs, self.outargs)) - ) + )) @property def host_to_device_calls(self): @@ -82,17 +88,23 @@ def host_to_device_calls(self): """ READ_ONLY, READ_WRITE = FieldAPITransferType.READ_ONLY, FieldAPITransferType.READ_WRITE + # Filter down to base symbols and avoid duplicates across sets + inargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in self.inargs)) + inoutargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in self.inoutargs)) + outargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in self.outargs)) + inargs = tuple(a for a in inargs if a not in inoutargs) + host_to_device = tuple(field_get_device_data( self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_ONLY, scope=self.scope - ) for arg in self.inargs) + ) for arg in inargs) host_to_device += tuple(field_get_device_data( self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope - ) for arg in self.inoutargs) + ) for arg in inoutargs) host_to_device += tuple(field_get_device_data( self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope - ) for arg in self.outargs) + ) for arg in outargs) - return host_to_device + return tuple(dict.fromkeys(host_to_device)) @property def sync_host_calls(self): @@ -105,7 +117,7 @@ def sync_host_calls(self): sync_host += tuple( field_sync_host(self.field_ptr_from_view(arg), scope=self.scope) for arg in self.outargs ) - return sync_host + return tuple(dict.fromkeys(sync_host)) def get_field_type(a: sym.Array) -> sym.DerivedType: From 35044a1982f8b997abcd14b804987f8e385f10d4 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 5 Dec 2024 09:55:43 +0000 Subject: [PATCH 11/15] DataOffload: Apply symbol substitution over driver in field offload Instead of subbing just on calls, we apply the remapping over the whole routine body. --- .../data_offload/field_offload.py | 17 ++++++++------- .../data_offload/tests/test_field_offload.py | 4 ++-- loki/transformations/field_api.py | 21 ++++++++----------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index d09d77b98..74bb6c6ff 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -12,7 +12,7 @@ from loki.ir import ( FindNodes, PragmaRegion, CallStatement, Transformer, pragma_regions_attached, - SubstituteExpressions + SubstituteExpressions, FindVariables ) from loki.logging import warning, error from loki.tools import as_tuple @@ -85,7 +85,7 @@ def process_driver(self, driver, targets): ) declare_device_ptrs(driver, deviceptrs=offload_map.dataptrs) add_field_offload_calls(driver, region, offload_map) - replace_kernel_args(driver, kernel_calls, offload_map, self.offload_index) + replace_kernel_args(driver, offload_map, self.offload_index) def find_target_calls(region, targets): @@ -158,10 +158,15 @@ def add_field_offload_calls(driver, region, offload_map): Transformer(update_map, inplace=True).visit(driver.body) -def replace_kernel_args(driver, kernel_calls, offload_map, offload_index): +def replace_kernel_args(driver, offload_map, offload_index): change_map = {} offload_idx_expr = driver.variable_map[offload_index] - for arg in chain(offload_map.inargs, offload_map.inoutargs, offload_map.outargs): + + args = tuple(chain(offload_map.inargs, offload_map.inoutargs, offload_map.outargs)) + for arg in FindVariables().visit(driver.body): + if not arg.name in args: + continue + devptr = offload_map.dataptr_from_array(arg) if len(arg.dimensions) != 0: dims = arg.dimensions + (offload_idx_expr,) @@ -169,6 +174,4 @@ def replace_kernel_args(driver, kernel_calls, offload_map, offload_index): dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) change_map[arg] = devptr.clone(dimensions=dims) - arg_transformer = SubstituteExpressions(change_map, inplace=True) - for call in kernel_calls: - arg_transformer.visit(call) + driver.body = SubstituteExpressions(change_map, inplace=True).visit(driver.body) diff --git a/loki/transformations/data_offload/tests/test_field_offload.py b/loki/transformations/data_offload/tests/test_field_offload.py index bb5f322cc..f430fbc96 100644 --- a/loki/transformations/data_offload/tests/test_field_offload.py +++ b/loki/transformations/data_offload/tests/test_field_offload.py @@ -9,7 +9,7 @@ from loki import Sourcefile, Module import loki.expression.symbols as sym -from loki.frontend import available_frontends +from loki.frontend import available_frontends, OMNI from loki.ir import nodes as ir, FindNodes, Pragma, CallStatement from loki.logging import log_levels @@ -632,5 +632,5 @@ def test_field_offload_aliasing(frontend, state_module, tmp_path): assert calls[2].arguments == () decls = FindNodes(ir.VariableDeclaration).visit(driver.spec) - assert len(decls) == 4 + assert len(decls) == 5 if frontend == OMNI else 4 assert decls[-1].symbols == ('state_a(:,:,:)',) diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py index 43f9b8935..3d4ec5672 100644 --- a/loki/transformations/field_api.py +++ b/loki/transformations/field_api.py @@ -47,9 +47,12 @@ def __init__(self, inargs, inoutargs, outargs, scope, ptr_prefix='loki_devptr_') outargs = tuple(v for v in outargs if v not in inoutargs) # Filter out duplicates and return as tuple - self.inargs = tuple(dict.fromkeys(inargs)) - self.inoutargs = tuple(dict.fromkeys(inoutargs)) - self.outargs = tuple(dict.fromkeys(outargs)) + self.inargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in inargs)) + self.inoutargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in inoutargs)) + self.outargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in outargs)) + + # Filter out duplicates across argument tuples + self.inargs = tuple(a for a in self.inargs if a not in self.inoutargs) self.scope = scope @@ -88,21 +91,15 @@ def host_to_device_calls(self): """ READ_ONLY, READ_WRITE = FieldAPITransferType.READ_ONLY, FieldAPITransferType.READ_WRITE - # Filter down to base symbols and avoid duplicates across sets - inargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in self.inargs)) - inoutargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in self.inoutargs)) - outargs = tuple(dict.fromkeys(a.clone(dimensions=None) for a in self.outargs)) - inargs = tuple(a for a in inargs if a not in inoutargs) - host_to_device = tuple(field_get_device_data( self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_ONLY, scope=self.scope - ) for arg in inargs) + ) for arg in self.inargs) host_to_device += tuple(field_get_device_data( self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope - ) for arg in inoutargs) + ) for arg in self.inoutargs) host_to_device += tuple(field_get_device_data( self.field_ptr_from_view(arg), self.dataptr_from_array(arg), READ_WRITE, scope=self.scope - ) for arg in outargs) + ) for arg in self.outargs) return tuple(dict.fromkeys(host_to_device)) From 36ae874a66dcea936304b04afdd008a87372b674 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 5 Dec 2024 14:19:33 +0000 Subject: [PATCH 12/15] DataOffload: Derive offload variables via dataflow analysis for regions --- .../data_offload/field_offload.py | 64 +++++++++++-------- .../data_offload/tests/test_field_offload.py | 61 ++++++++++++++++++ 2 files changed, 97 insertions(+), 28 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index 74bb6c6ff..ed7244573 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -7,18 +7,18 @@ from itertools import chain +from loki.analyse import dataflow_analysis_attached from loki.batch import Transformation from loki.expression import Array, symbols as sym from loki.ir import ( - FindNodes, PragmaRegion, CallStatement, - Transformer, pragma_regions_attached, - SubstituteExpressions, FindVariables + nodes as ir, FindNodes, PragmaRegion, CallStatement, Transformer, + pragma_regions_attached, SubstituteExpressions, FindVariables, + is_loki_pragma ) from loki.logging import warning, error from loki.tools import as_tuple from loki.types import BasicType -from loki.transformations.data_offload.offload import DataOffloadTransformation from loki.transformations.field_api import FieldPointerMap from loki.transformations.parallel import remove_field_api_view_updates @@ -72,20 +72,27 @@ def transform_subroutine(self, routine, **kwargs): self.process_driver(routine, targets) def process_driver(self, driver, targets): + + # Remove the Field-API view-pointer boilerplate remove_field_api_view_updates(driver, self.field_group_types) + with pragma_regions_attached(driver): - for region in FindNodes(PragmaRegion).visit(driver.body): - # Only work on active `!$loki data` regions - if not DataOffloadTransformation._is_active_loki_data_region(region, targets): - continue - kernel_calls = find_target_calls(region, targets) - offload_variables = find_offload_variables(driver, kernel_calls, self.field_group_types) - offload_map = FieldPointerMap( - *offload_variables, scope=driver, ptr_prefix=self.deviceptr_prefix - ) - declare_device_ptrs(driver, deviceptrs=offload_map.dataptrs) - add_field_offload_calls(driver, region, offload_map) - replace_kernel_args(driver, offload_map, self.offload_index) + with dataflow_analysis_attached(driver): + for region in FindNodes(PragmaRegion).visit(driver.body): + # Only work on active `!$loki data` regions + if not region.pragma or not is_loki_pragma(region.pragma, starts_with='data'): + continue + + # Determine the array variables for generating Field API offload + offload_variables = find_offload_variables(driver, region, self.field_group_types) + offload_map = FieldPointerMap( + *offload_variables, scope=driver, ptr_prefix=self.deviceptr_prefix + ) + + # Inject declarations and offload API calls into driver region + declare_device_ptrs(driver, deviceptrs=offload_map.dataptrs) + add_field_offload_calls(driver, region, offload_map) + replace_kernel_args(driver, offload_map, self.offload_index) def find_target_calls(region, targets): @@ -104,12 +111,20 @@ def find_target_calls(region, targets): return calls -def find_offload_variables(driver, calls, field_group_types): - inargs = () - inoutargs = () - outargs = () +def find_offload_variables(driver, region, field_group_types): + + # Use dataflow analysis to find in, out and inout variables to that region + inargs = region.uses_symbols - region.defines_symbols + inoutargs = region.uses_symbols & region.defines_symbols + outargs = region.defines_symbols - region.uses_symbols - for call in calls: + # Filter out relevant array symbols + inargs = tuple(a for a in inargs if isinstance(a, sym.Array) and a.parent) + inoutargs = tuple(a for a in inoutargs if isinstance(a, sym.Array) and a.parent) + outargs = tuple(a for a in outargs if isinstance(a, sym.Array) and a.parent) + + # Do some sanity checking and warning for enclosed calls + for call in FindNodes(ir.CallStatement).visit(region): if call.routine is BasicType.DEFERRED: error(f'[Loki] Data offload: Routine {driver.name} has not been enriched ' + f'in {str(call.name).lower()}') @@ -128,13 +143,6 @@ def find_offload_variables(driver, calls, field_group_types): + f' {driver.name} that is not wrapped by a Field API object') continue - if param.type.intent.lower() == 'in': - inargs += (arg, ) - if param.type.intent.lower() == 'inout': - inoutargs += (arg, ) - if param.type.intent.lower() == 'out': - outargs += (arg, ) - return inargs, inoutargs, outargs diff --git a/loki/transformations/data_offload/tests/test_field_offload.py b/loki/transformations/data_offload/tests/test_field_offload.py index f430fbc96..0067e4a7f 100644 --- a/loki/transformations/data_offload/tests/test_field_offload.py +++ b/loki/transformations/data_offload/tests/test_field_offload.py @@ -634,3 +634,64 @@ def test_field_offload_aliasing(frontend, state_module, tmp_path): decls = FindNodes(ir.VariableDeclaration).visit(driver.spec) assert len(decls) == 5 if frontend == OMNI else 4 assert decls[-1].symbols == ('state_a(:,:,:)',) + + +@pytest.mark.parametrize('frontend', available_frontends()) +def test_field_offload_driver_compute(frontend, state_module, tmp_path): + fcode = """ + module driver_mod + use state_mod, only: state_type + use parkind1, only: jprb + implicit none + + contains + + subroutine driver_routine(nlon, nlev, state) + integer, intent(in) :: nlon, nlev + type(state_type), intent(inout) :: state + integer :: i, ibl + + !$loki data + do ibl=1,nlev + call state%update_view(ibl) + + do i=1, nlon + state%a(i, 1) = state%b(i, 1) + 0.1 + state%a(i, 2) = state%a(i, 1) + end do + + end do + !$loki end data + + end subroutine driver_routine + end module driver_mod + """ + driver_mod = Module.from_source( + fcode, frontend=frontend, definitions=state_module, xmods=[tmp_path] + ) + driver = driver_mod['driver_routine'] + + calls = FindNodes(ir.CallStatement).visit(driver.body) + assert len(calls) == 1 + assert calls[0].name == 'state%update_view' + + field_offload = FieldOffloadTransformation( + devptr_prefix='', offload_index='ibl', field_group_types=['state_type'] + ) + driver.apply(field_offload, role='driver', targets=['kernel_routine']) + + calls = FindNodes(ir.CallStatement).visit(driver.body) + assert len(calls) == 3 + assert calls[0].name == 'state%f_b%get_device_data_rdonly' + assert calls[0].arguments == ('state_b',) + assert calls[1].name == 'state%f_a%get_device_data_rdwr' + assert calls[1].arguments == ('state_a',) + assert calls[2].name == 'state%f_a%sync_host_rdwr' + assert calls[2].arguments == () + + assigns = FindNodes(ir.Assignment).visit(driver.body) + assert len(assigns) == 2 + assert assigns[0].lhs == 'state_a(i,1,ibl)' + assert assigns[0].rhs == 'state_b(i,1,ibl) + 0.1' + assert assigns[1].lhs == 'state_a(i,2,ibl)' + assert assigns[1].rhs == 'state_a(i,1,ibl)' From 01ba6220eda8ba840101748e9c5e47d951880560 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 5 Dec 2024 14:31:01 +0000 Subject: [PATCH 13/15] DataOffload: Tidy up imports and remove obsolete utility and test Without tying the transformation to calls, explicit no-target skipping becomes virtually impossible; hence removing the test for it. --- .../data_offload/field_offload.py | 35 ++------ .../data_offload/tests/test_field_offload.py | 87 ------------------- 2 files changed, 7 insertions(+), 115 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index ed7244573..58dbd7a54 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -11,23 +11,19 @@ from loki.batch import Transformation from loki.expression import Array, symbols as sym from loki.ir import ( - nodes as ir, FindNodes, PragmaRegion, CallStatement, Transformer, - pragma_regions_attached, SubstituteExpressions, FindVariables, - is_loki_pragma + nodes as ir, FindNodes, FindVariables, Transformer, + SubstituteExpressions, pragma_regions_attached, is_loki_pragma ) from loki.logging import warning, error -from loki.tools import as_tuple from loki.types import BasicType from loki.transformations.field_api import FieldPointerMap from loki.transformations.parallel import remove_field_api_view_updates - __all__ = [ - 'FieldOffloadTransformation', 'find_target_calls', - 'find_offload_variables', 'add_field_offload_calls', - 'replace_kernel_args' + 'FieldOffloadTransformation', 'find_offload_variables', + 'add_field_offload_calls', 'replace_kernel_args' ] @@ -67,18 +63,17 @@ def __init__(self, devptr_prefix=None, field_group_types=None, offload_index=Non def transform_subroutine(self, routine, **kwargs): role = kwargs['role'] - targets = as_tuple(kwargs.get('targets'), (None)) if role == 'driver': - self.process_driver(routine, targets) + self.process_driver(routine) - def process_driver(self, driver, targets): + def process_driver(self, driver): # Remove the Field-API view-pointer boilerplate remove_field_api_view_updates(driver, self.field_group_types) with pragma_regions_attached(driver): with dataflow_analysis_attached(driver): - for region in FindNodes(PragmaRegion).visit(driver.body): + for region in FindNodes(ir.PragmaRegion).visit(driver.body): # Only work on active `!$loki data` regions if not region.pragma or not is_loki_pragma(region.pragma, starts_with='data'): continue @@ -95,22 +90,6 @@ def process_driver(self, driver, targets): replace_kernel_args(driver, offload_map, self.offload_index) -def find_target_calls(region, targets): - """ - Returns a list of all calls to targets inside the region. - - Parameters - ---------- - :region: :any:`PragmaRegion` - :targets: collection of :any:`Subroutine` - Iterable object of subroutines or functions called - :returns: list of :any:`CallStatement` - """ - calls = FindNodes(CallStatement).visit(region) - calls = [c for c in calls if str(c.name).lower() in targets] - return calls - - def find_offload_variables(driver, region, field_group_types): # Use dataflow analysis to find in, out and inout variables to that region diff --git a/loki/transformations/data_offload/tests/test_field_offload.py b/loki/transformations/data_offload/tests/test_field_offload.py index 0067e4a7f..f1bdb3608 100644 --- a/loki/transformations/data_offload/tests/test_field_offload.py +++ b/loki/transformations/data_offload/tests/test_field_offload.py @@ -315,93 +315,6 @@ def test_field_offload_multiple_calls(frontend, state_module, tmp_path): assert devptr.name in (arg.name for kernel_call in kernel_calls for arg in kernel_call.arguments) -@pytest.mark.parametrize('frontend', available_frontends()) -def test_field_offload_no_targets(frontend, state_module, tmp_path): - fother = """ - module another_module - implicit none - contains - subroutine another_kernel(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real, intent(in) :: a(nlon,nlev) - real, intent(inout) :: b(nlon,nlev) - real, intent(out) :: c(nlon,nlev) - integer :: i, j - end subroutine - end module - """ - - fcode = """ - module driver_mod - use parkind1, only: jprb - use state_mod, only: state_type - use another_module, only: another_kernel - - implicit none - - contains - - subroutine kernel_routine(nlon, nlev, a, b, c) - integer, intent(in) :: nlon, nlev - real(kind=jprb), intent(in) :: a(nlon,nlev) - real(kind=jprb), intent(inout) :: b(nlon,nlev) - real(kind=jprb), intent(out) :: c(nlon,nlev) - integer :: i, j - - do j=1, nlon - do i=1, nlev - b(i,j) = a(i,j) + 0.1 - c(i,j) = 0.1 - end do - end do - end subroutine kernel_routine - - subroutine driver_routine(nlon, nlev, state) - integer, intent(in) :: nlon, nlev - type(state_type), intent(inout) :: state - integer :: i - - !$loki data - do i=1,nlev - call state%update_view(i) - call another_kernel(nlon, state%a, state%b, state%c) - end do - !$loki end data - - end subroutine driver_routine - end module driver_mod - """ - - Sourcefile.from_source(fother, frontend=frontend, xmods=[tmp_path]) - driver_mod = Module.from_source( - fcode, frontend=frontend, definitions=state_module,xmods=[tmp_path] - ) - driver = driver_mod['driver_routine'] - deviceptr_prefix = 'loki_devptr_prefix_' - driver.apply(FieldOffloadTransformation(devptr_prefix=deviceptr_prefix, - offload_index='i', - field_group_types=['state_type']), - role='driver', - targets=['kernel_routine']) - - calls = FindNodes(CallStatement).visit(driver.body) - assert not any(c for c in calls if c.name=='kernel_routine') - - # verify that no field offloads are generated - in_calls = [c for c in calls if 'get_device_data_rdonly' in c.name.name.lower()] - assert len(in_calls) == 0 - inout_calls = [c for c in calls if 'get_device_data_rdwr' in c.name.name.lower()] - assert len(inout_calls) == 0 - # verify that no field sync host calls are generated - sync_calls = [c for c in calls if 'sync_host_rdwr' in c.name.name.lower()] - assert len(sync_calls) == 0 - - # verify that data offload pragmas remain - pragmas = FindNodes(Pragma).visit(driver.body) - assert len(pragmas) == 2 - assert all(p.keyword=='loki' and p.content==c for p, c in zip(pragmas, ['data', 'end data'])) - - @pytest.mark.parametrize('frontend', available_frontends()) def test_field_offload_unknown_kernel(caplog, frontend, state_module, tmp_path): fother = """ From 25409e6e43cb249201f273d2594b03181371b413 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Mon, 6 Jan 2025 13:26:43 +0000 Subject: [PATCH 14/15] FieldAPI: Small updates to docstrings --- .../data_offload/field_offload.py | 25 ++++++++++++++++++- loki/transformations/field_api.py | 7 ++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index 58dbd7a54..e4c6b09e1 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -30,7 +30,9 @@ class FieldOffloadTransformation(Transformation): """ - Transformation to offload arrays owned by Field API fields to the device. **This transformation is IFS specific.** + Transformation to offload arrays owned by Field API fields to the device. + + **This transformation is IFS specific.** The transformation assumes that fields are wrapped in derived types specified in ``field_group_types`` and will only offload arrays that are members of such derived types. @@ -91,6 +93,27 @@ def process_driver(self, driver): def find_offload_variables(driver, region, field_group_types): + """ + Finds the sets of array variable symbols for which we can generate + Field API offload code. + + Note + ---- + This method requires Loki's dataflow analysis to be run on the + :data:`region` via :meth:`dataflow_analysis_attached`. + + Parameters + ---------- + region : :any:`PragmaRegion` + Code region object for which to determine offload variables + field_group_types : list or tuple of str, optional + Names of the field group types with members that may be offloaded (defaults to ``['']``). + + Returns + ------- + (inargs, inoutargs, outargs) : (tuple, tuple, tuple) + The sets of array symbols split into three tuples according to access type. + """ # Use dataflow analysis to find in, out and inout variables to that region inargs = region.uses_symbols - region.defines_symbols diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py index 3d4ec5672..0bda7a380 100644 --- a/loki/transformations/field_api.py +++ b/loki/transformations/field_api.py @@ -6,7 +6,7 @@ # nor does it submit to any jurisdiction. """ -A set utility classes for dealing with FIELD API boilerplate in +A set of utility classes for dealing with FIELD API boilerplate in parallel kernels and offload regions. """ @@ -35,10 +35,7 @@ class FieldPointerMap: Helper class to map FIELD API pointers to intents and access descriptors. This utility is used to store arrays passed to target kernel calls - and the corresponding device pointers added by the transformation. - - The pointer/array variable pairs are exposed through the class - properties, based on the intent of the kernel argument. + and easily access corresponding device pointers added by the transformation. """ def __init__(self, inargs, inoutargs, outargs, scope, ptr_prefix='loki_devptr_'): # Ensure no duplication between in/inout/out args From cede76b6710b5d721d60e7e1f60a3a5d409f25e8 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Mon, 6 Jan 2025 14:13:31 +0000 Subject: [PATCH 15/15] FieldAPI: Add args property to FieldPointerMap and devptr->dataptr --- .../data_offload/field_offload.py | 10 ++++------ loki/transformations/field_api.py | 16 +++++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/loki/transformations/data_offload/field_offload.py b/loki/transformations/data_offload/field_offload.py index e4c6b09e1..3d6c3a8f1 100644 --- a/loki/transformations/data_offload/field_offload.py +++ b/loki/transformations/data_offload/field_offload.py @@ -5,8 +5,6 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -from itertools import chain - from loki.analyse import dataflow_analysis_attached from loki.batch import Transformation from loki.expression import Array, symbols as sym @@ -172,16 +170,16 @@ def replace_kernel_args(driver, offload_map, offload_index): change_map = {} offload_idx_expr = driver.variable_map[offload_index] - args = tuple(chain(offload_map.inargs, offload_map.inoutargs, offload_map.outargs)) + args = offload_map.args for arg in FindVariables().visit(driver.body): if not arg.name in args: continue - devptr = offload_map.dataptr_from_array(arg) + dataptr = offload_map.dataptr_from_array(arg) if len(arg.dimensions) != 0: dims = arg.dimensions + (offload_idx_expr,) else: - dims = (sym.RangeIndex((None, None)),) * (len(devptr.shape)-1) + (offload_idx_expr,) - change_map[arg] = devptr.clone(dimensions=dims) + dims = (sym.RangeIndex((None, None)),) * (len(dataptr.shape)-1) + (offload_idx_expr,) + change_map[arg] = dataptr.clone(dimensions=dims) driver.body = SubstituteExpressions(change_map, inplace=True).visit(driver.body) diff --git a/loki/transformations/field_api.py b/loki/transformations/field_api.py index 0bda7a380..58f1d6f1a 100644 --- a/loki/transformations/field_api.py +++ b/loki/transformations/field_api.py @@ -37,7 +37,7 @@ class FieldPointerMap: This utility is used to store arrays passed to target kernel calls and easily access corresponding device pointers added by the transformation. """ - def __init__(self, inargs, inoutargs, outargs, scope, ptr_prefix='loki_devptr_'): + def __init__(self, inargs, inoutargs, outargs, scope, ptr_prefix='loki_ptr_'): # Ensure no duplication between in/inout/out args inoutargs += tuple(v for v in inargs if v in outargs) inargs = tuple(v for v in inargs if v not in inoutargs) @@ -60,9 +60,9 @@ def dataptr_from_array(self, a: sym.Array): Returns a contiguous pointer :any:`Variable` with types matching the array :data:`a`. """ shape = (sym.RangeIndex((None, None)),) * (len(a.shape)+1) - devptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) + dataptr_type = a.type.clone(pointer=True, contiguous=True, shape=shape, intent=None) base_name = a.name if a.parent is None else '_'.join(a.name.split('%')) - return sym.Variable(name=self.ptr_prefix + base_name, type=devptr_type, dimensions=shape) + return sym.Variable(name=self.ptr_prefix + base_name, type=dataptr_type, dimensions=shape) @staticmethod def field_ptr_from_view(field_view): @@ -73,13 +73,15 @@ def field_ptr_from_view(field_view): field_type_name = 'F_' + type_chain[-1] return field_view.parent.get_derived_type_member(field_type_name) + @property + def args(self): + """ A tuple of all argument symbols, concatanating in/inout/out arguments """ + return tuple(chain(*(self.inargs, self.inoutargs, self.outargs))) + @property def dataptrs(self): """ Create a list of contiguous data pointer symbols """ - return tuple(dict.fromkeys( - self.dataptr_from_array(a) - for a in chain(*(self.inargs, self.inoutargs, self.outargs)) - )) + return tuple(dict.fromkeys(self.dataptr_from_array(a) for a in self.args)) @property def host_to_device_calls(self):