diff --git a/loki/backend/maxgen.py b/loki/backend/maxgen.py index 13984d191..2b0f6b9c7 100644 --- a/loki/backend/maxgen.py +++ b/loki/backend/maxgen.py @@ -176,7 +176,7 @@ class extends Kernel { # Class signature if is_manager: - if is_interface: + if is_interface: # pylint: disable=possibly-used-before-assignment header += [self.format_line( 'public interface ', o.name, ' extends ManagerPCIe, ManagerKernel {')] else: diff --git a/loki/dimension.py b/loki/dimension.py index a85a1a662..48103b496 100644 --- a/loki/dimension.py +++ b/loki/dimension.py @@ -34,15 +34,19 @@ class Dimension: bounds_aliases : list or tuple of strings String representations of alternative bounds variables that are used to define loop ranges. + index_aliases : list or tuple of strings + String representations of alternative loop index variables associated + with this dimension. """ def __init__(self, name=None, index=None, bounds=None, size=None, aliases=None, - bounds_aliases=None): + bounds_aliases=None, index_aliases=None): self.name = name self._index = index self._bounds = as_tuple(bounds) self._size = size self._aliases = as_tuple(aliases) + self._index_aliases = as_tuple(index_aliases) if bounds_aliases: if len(bounds_aliases) != 2: @@ -118,3 +122,15 @@ def bounds_expressions(self): exprs = [expr + (b,) for expr, b in zip(exprs, self._bounds_aliases)] return as_tuple(exprs) + + @property + def index_expressions(self): + """ + A list of all expression strings representing the index expression of an iteration space (loop). + """ + + exprs = [self.index,] + if self._index_aliases: + exprs += list(self._index_aliases) + + return as_tuple(exprs) diff --git a/loki/frontend/fparser.py b/loki/frontend/fparser.py index ff939ded5..65729ed00 100644 --- a/loki/frontend/fparser.py +++ b/loki/frontend/fparser.py @@ -685,6 +685,7 @@ def visit_Char_Selector(self, o, **kwargs): * some scalar expression for the kind """ length = None + kind = None if o.children[0] is not None: length = self.visit(o.children[0], **kwargs) if o.children[1] is not None: diff --git a/loki/program_unit.py b/loki/program_unit.py index f73063020..eb2c2650f 100644 --- a/loki/program_unit.py +++ b/loki/program_unit.py @@ -164,7 +164,7 @@ def from_source(cls, source, definitions=None, preprocess=False, if frontend == Frontend.OFP: ast = parse_ofp_source(source) return cls.from_ofp(ast=ast, raw_source=source, definitions=definitions, - pp_info=pp_info, parent=parent) + pp_info=pp_info, parent=parent) # pylint: disable=possibly-used-before-assignment if frontend == Frontend.FP: ast = parse_fparser_source(source) @@ -361,6 +361,13 @@ def enrich(self, definitions, recurse=False): updated_symbol_attrs[local_name] = symbol.type.clone( dtype=remote_node.dtype, imported=True, module=module ) + # Update dtype for local variables using this type + variables_with_this_type = { + name: type_.clone(dtype=remote_node.dtype) + for name, type_ in self.symbol_attrs.items() + if getattr(type_.dtype, 'name') == remote_node.dtype.name + } + updated_symbol_attrs.update(variables_with_this_type) elif hasattr(remote_node, 'type'): # This is a global variable or interface import updated_symbol_attrs[local_name] = remote_node.type.clone( diff --git a/loki/tests/test_subroutine.py b/loki/tests/test_subroutine.py index d055bcd01..6593c0d0c 100644 --- a/loki/tests/test_subroutine.py +++ b/loki/tests/test_subroutine.py @@ -12,15 +12,16 @@ from loki import ( Sourcefile, Module, Subroutine, FindVariables, FindNodes, Section, - CallStatement, BasicType, Array, Scalar, Variable, + Array, Scalar, Variable, SymbolAttributes, StringLiteral, fgen, fexprgen, VariableDeclaration, Transformer, FindTypedSymbols, - ProcedureSymbol, ProcedureType, StatementFunction, - normalize_range_indexing, DeferredTypeSymbol, Assignment, - Interface + ProcedureSymbol, StatementFunction, + normalize_range_indexing, DeferredTypeSymbol ) from loki.build import jit_compile, jit_compile_lib, clean_test from loki.frontend import available_frontends, OFP, OMNI, REGEX +from loki.types import BasicType, DerivedType, ProcedureType +from loki.ir import nodes as ir @pytest.fixture(scope='module', name='here') @@ -767,7 +768,7 @@ def test_routine_call_arrays(header_path, frontend): """ header = Sourcefile.from_file(header_path, frontend=frontend)['header'] routine = Subroutine.from_source(fcode, frontend=frontend, definitions=header) - call = FindNodes(CallStatement).visit(routine.body)[0] + call = FindNodes(ir.CallStatement).visit(routine.body)[0] assert str(call.arguments[0]) == 'x' assert str(call.arguments[1]) == 'y' @@ -797,7 +798,7 @@ def test_call_no_arg(frontend): call abort end subroutine routine_call_no_arg """) - calls = FindNodes(CallStatement).visit(routine.body) + calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 assert calls[0].arguments == () assert calls[0].kwarguments == () @@ -813,7 +814,7 @@ def test_call_kwargs(frontend): call mpl_init(kprocs=kprocs, cdstring='routine_call_kwargs') end subroutine routine_call_kwargs """) - calls = FindNodes(CallStatement).visit(routine.body) + calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 assert calls[0].name == 'mpl_init' @@ -838,7 +839,7 @@ def test_call_args_kwargs(frontend): call mpl_send(pbuf, ktag, kdest, cdstring='routine_call_args_kwargs') end subroutine routine_call_args_kwargs """) - calls = FindNodes(CallStatement).visit(routine.body) + calls = FindNodes(ir.CallStatement).visit(routine.body) assert len(calls) == 1 assert calls[0].name == 'mpl_send' assert len(calls[0].arguments) == 3 @@ -1520,7 +1521,7 @@ def test_subroutine_stmt_func(here, frontend): routine.name += f'_{frontend!s}' # Make sure the statement function injection doesn't invalidate source - for assignment in FindNodes(Assignment).visit(routine.body): + for assignment in FindNodes(ir.Assignment).visit(routine.body): assert assignment.source is not None # OMNI inlines statement functions, so we can only check correct representation @@ -1958,7 +1959,7 @@ def test_subroutine_clone_contained(frontend): kernels = driver.subroutines def _verify_call_enrichment(driver_, kernels_): - calls = FindNodes(CallStatement).visit(driver_.body) + calls = FindNodes(ir.CallStatement).visit(driver_.body) assert len(calls) == 2 for call in calls: @@ -2048,12 +2049,12 @@ def test_enrich_explicit_interface(frontend): driver.enrich(kernel) # check if call is enriched correctly - calls = FindNodes(CallStatement).visit(driver.body) + calls = FindNodes(ir.CallStatement).visit(driver.body) assert calls[0].routine is kernel # check if the procedure symbol in the interface block has been removed from # driver's symbol table - intfs = FindNodes(Interface).visit(driver.spec) + intfs = FindNodes(ir.Interface).visit(driver.spec) assert not intfs[0].body[0].parent # check that call still points to correct subroutine @@ -2065,6 +2066,64 @@ def test_enrich_explicit_interface(frontend): assert calls[0].routine is kernel +@pytest.mark.parametrize('frontend', available_frontends()) +def test_enrich_derived_types(tmp_path, frontend): + fcode = """ +subroutine enrich_derived_types_routine(yda_array) +use field_array_module, only : field_3rb_array +implicit none +type(field_3rb_array), intent(inout) :: yda_array +yda_array%p = 0. +end subroutine enrich_derived_types_routine + """.strip() + + fcode_module = """ +module field_array_module +implicit none +type field_3rb_array + real, pointer :: p(:,:,:) +end type field_3rb_array +end module field_array_module + """.strip() + + module = Module.from_source(fcode_module, frontend=frontend, xmods=[tmp_path]) + routine = Subroutine.from_source(fcode, frontend=frontend, xmods=[tmp_path]) + + # The derived type is a dangling import + field_3rb_symbol = routine.symbol_map['field_3rb_array'] + assert field_3rb_symbol.type.imported + assert field_3rb_symbol.type.module is None + assert field_3rb_symbol.type.dtype is BasicType.DEFERRED + + # The variable type is recognized as a derived type but without enrichment + yda_array = routine.variable_map['yda_array'] + assert isinstance(yda_array.type.dtype, DerivedType) + assert routine.variable_map['yda_array'].type.dtype.typedef is BasicType.DEFERRED + + # The pointer member has no type information + yda_array_p = routine.resolve_typebound_var('yda_array%p') + assert yda_array_p.type.dtype is BasicType.DEFERRED + assert yda_array_p.type.shape is None + + # Pick out the typedef (before enrichment to validate object consistency) + field_3rb_tdef = module['field_3rb_array'] + assert isinstance(field_3rb_tdef, ir.TypeDef) + + # Enrich the routine with module definitions + routine.enrich(module) + + # Ensure the imported type symbol is correctly enriched + assert field_3rb_symbol.type.imported + assert field_3rb_symbol.type.module is module + assert isinstance(field_3rb_symbol.type.dtype, DerivedType) + + # Ensure the information has been propagated to other variables + assert isinstance(yda_array.type.dtype, DerivedType) + assert yda_array.type.dtype.typedef is field_3rb_tdef + assert yda_array_p.type.dtype is BasicType.REAL + assert yda_array_p.type.shape == (':', ':', ':') + + @pytest.mark.parametrize('frontend', available_frontends( xfail=[(OMNI, 'OMNI cannot handle external type defs without source')] )) @@ -2099,15 +2158,15 @@ def test_subroutine_deep_clone(frontend): # Replace all assignments with dummy calls map_nodes={} - for assign in FindNodes(Assignment).visit(new_routine.body): - map_nodes[assign] = CallStatement( + for assign in FindNodes(ir.Assignment).visit(new_routine.body): + map_nodes[assign] = ir.CallStatement( name=DeferredTypeSymbol(name='testcall'), arguments=(assign.lhs,), scope=new_routine ) new_routine.body = Transformer(map_nodes).visit(new_routine.body) # Ensure that the original copy of the routine remains unaffected - assert len(FindNodes(Assignment).visit(routine.body)) == 3 - assert len(FindNodes(Assignment).visit(new_routine.body)) == 0 + assert len(FindNodes(ir.Assignment).visit(routine.body)) == 3 + assert len(FindNodes(ir.Assignment).visit(new_routine.body)) == 0 @pytest.mark.parametrize('frontend', available_frontends()) def test_call_args_kwargs_conversion(frontend): @@ -2162,20 +2221,20 @@ def test_call_args_kwargs_conversion(frontend): len_kwargs = (0, 7, 7, 2) # sort kwargs - for i_call, call in enumerate(FindNodes(CallStatement).visit(driver.body)): + for i_call, call in enumerate(FindNodes(ir.CallStatement).visit(driver.body)): assert call.check_kwarguments_order() == kwargs_in_order[i_call] call.sort_kwarguments() # check calls with sorted kwargs - for i_call, call in enumerate(FindNodes(CallStatement).visit(driver.body)): + for i_call, call in enumerate(FindNodes(ir.CallStatement).visit(driver.body)): assert tuple(arg[1].name for arg in call.arg_iter()) == call_args assert len(call.kwarguments) == len_kwargs[i_call] # kwarg to arg conversion - for call in FindNodes(CallStatement).visit(driver.body): + for call in FindNodes(ir.CallStatement).visit(driver.body): call.convert_kwargs_to_args() # check calls with kwargs converted to args - for call in FindNodes(CallStatement).visit(driver.body): + for call in FindNodes(ir.CallStatement).visit(driver.body): assert tuple(arg.name for arg in call.arguments) == call_args assert call.kwarguments == () diff --git a/loki/transformations/__init__.py b/loki/transformations/__init__.py index 6d4a8f863..46fced6f9 100644 --- a/loki/transformations/__init__.py +++ b/loki/transformations/__init__.py @@ -30,3 +30,4 @@ from loki.transformations.transform_region import * # noqa from loki.transformations.pool_allocator import * # noqa from loki.transformations.utilities import * # noqa +from loki.transformations.block_index_transformations import * # noqa diff --git a/loki/transformations/block_index_transformations.py b/loki/transformations/block_index_transformations.py new file mode 100644 index 000000000..812ccd079 --- /dev/null +++ b/loki/transformations/block_index_transformations.py @@ -0,0 +1,416 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from loki.batch import Transformation, ProcedureItem +from loki.ir import nodes as ir, FindNodes +from loki.module import Module +from loki.tools import as_tuple +from loki.types import SymbolAttributes, BasicType +from loki.expression import Variable, Array, RangeIndex, FindVariables, SubstituteExpressions +from loki.transformations.sanitise import resolve_associates +from loki.transformations.utilities import recursive_expression_map_update +from loki.transformations.single_column.base import SCCBaseTransformation + +__all__ = ['BlockViewToFieldViewTransformation', 'InjectBlockIndexTransformation'] + +class BlockViewToFieldViewTransformation(Transformation): + """ + A very IFS-specific transformation to replace per-block, i.e. per OpenMP-thread, view pointers with per-field + view pointers. It should be noted that this transformation only replaces the view pointers but does not actually + insert the block index into the promoted view pointers. Therefore this transformation must always be followed by + the :any:`InjectBlockIndexTransformation`. + + For example, the following code: + + .. code-block:: fortran + + do jlon=1,nproma + mystruct%p(jlon,:) = 0. + enddo + + is transformed to: + + .. code-block:: fortran + + do jlon=1,nproma + mystruct%p_field(jlon,:) = 0. + enddo + + As the rank of ``my_struct%p_field`` is one greater than that of ``my_struct%p``, we would need to also apply + the :any:`InjectBlockIndexTransformation` to obtain semantically correct code: + + .. code-block:: fortran + + do jlon=1,nproma + mystruct%p_field(jlon,:,ibl) = 0. + enddo + + Specific arrays in individual routines can also be marked for exclusion from this transformation by assigning + them to the `exclude_arrays` list in the :any:`SchedulerConfig`. + + This transformation also creates minimal definitions of FIELD API wrappers (i.e. FIELD_RANKSUFF_ARRAY) and + uses them to enrich the :any:`DataType` of relevant variable declarations and expression nodes. This is + required because FIELD API can be built independently of library targets Loki would typically operate on. + + Parameters + ---------- + horizontal : :any:`Dimension` + :any:`Dimension` object describing the variable conventions used in code + to define the horizontal data dimension and iteration space. + global_gfl_ptr: bool + Toggle whether thread-local gfl_ptr should be replaced with global. + key : str, optional + Specify a different identifier under which trafo_data is stored + """ + + _key = 'BlockViewToFieldViewTransformation' + """Identifier for trafo_data entry""" + + item_filter = (ProcedureItem,) + + def __init__(self, horizontal, global_gfl_ptr=False): + self.horizontal = horizontal + self.global_gfl_ptr = global_gfl_ptr + + def transform_subroutine(self, routine, **kwargs): + + if not (item := kwargs.get('item', None)): + raise RuntimeError('Cannot apply BlockViewToFieldViewTransformation without item to store definitions') + successors = kwargs.get('successors', ()) + + role = kwargs['role'] + targets = tuple(str(t).lower() for t in as_tuple(kwargs.get('targets', None))) + + exclude_arrays = item.config.get('exclude_arrays', []) + + if role == 'kernel': + self.process_kernel(routine, item, successors, targets, exclude_arrays) + if role == 'driver': + self.process_driver(routine, successors) + + @staticmethod + def _get_parkind_suffix(_type): + return _type.rsplit('_')[1][1:3] + + def _build_parkind_import(self, field_array_module, wrapper_types): + + deferred_type = SymbolAttributes(BasicType.DEFERRED, imported=True) + _vars = {Variable(name='JP' + self._get_parkind_suffix(t), type=deferred_type, scope=field_array_module) + for t in wrapper_types} + + return ir.Import(module='PARKIND1', symbols=as_tuple(_vars)) + + def _build_field_array_types(self, field_array_module, wrapper_types): + """ + Build FIELD_RANKSUFF_ARRAY type-definitions. + """ + + typedefs = () + for _type in wrapper_types: + suff = self._get_parkind_suffix(_type) + kind = field_array_module.symbol_map['JP' + suff] + rank = int(_type.rsplit('_')[1][0]) + + view_shape = (RangeIndex(children=(None, None)),) * (rank - 1) + array_shape = (RangeIndex(children=(None, None)),) * rank + + if suff == 'IM': + basetype = BasicType.INTEGER + elif suff == 'LM': + basetype = BasicType.LOGICAL + else: + basetype = BasicType.REAL + + pointer_type = SymbolAttributes(basetype, pointer=True, kind=kind, shape=view_shape) + contig_pointer_type = pointer_type.clone(contiguous=True, shape=array_shape) + + pointer_var = Variable(name='P', type=pointer_type, dimensions=view_shape) + contig_pointer_var = pointer_var.clone(name='P_FIELD', type=contig_pointer_type, dimensions=array_shape) # pylint: disable=no-member + + decls = (ir.VariableDeclaration(symbols=(pointer_var,)),) + decls += (ir.VariableDeclaration(symbols=(contig_pointer_var,)),) + + typedefs += (ir.TypeDef(name=_type, body=decls, parent=field_array_module),) # pylint: disable=unexpected-keyword-arg + + return typedefs + + def _create_dummy_field_api_defs(self, field_array_mod_imports): + """ + Create dummy definitions for FIELD_API wrapper-types to enrich typedefs. + """ + + wrapper_types = {sym.name for imp in field_array_mod_imports for sym in imp.symbols} + + # create dummy module with empty spec + field_array_module = Module(name='FIELD_ARRAY_MODULE', spec=ir.Section(body=())) + + # build parkind1 import + parkind_import = self._build_parkind_import(field_array_module, wrapper_types) + field_array_module.spec.append(parkind_import) + + # build dummy type definitions + typedefs = self._build_field_array_types(field_array_module, wrapper_types) + field_array_module.spec.append(typedefs) + + return [field_array_module,] + + @staticmethod + def propagate_defs_to_children(key, definitions, successors): + """ + Enrich all successors with the dummy FIELD_API definitions. + """ + + for child in successors: + child.ir.enrich(definitions) + child.trafo_data.update({key: {'definitions': definitions}}) + + def process_driver(self, routine, successors): + + # create dummy definitions for field_api wrapper types + field_array_mod_imports = [imp for imp in routine.imports if imp.module.lower() == 'field_array_module'] + definitions = [] + if field_array_mod_imports: + definitions += self._create_dummy_field_api_defs(field_array_mod_imports) + + # propagate dummy field_api wrapper definitions to children + self.propagate_defs_to_children(self._key, definitions, successors) + + #TODO: we also need to process any code inside a loki/acdc parallel pragma at the driver layer + + def build_ydvars_global_gfl_ptr(self, var): + """Replace accesses to thread-local ``YDVARS%GFL_PTR`` with global ``YDVARS%GFL_PTR_G``.""" + + if (parent := var.parent): + parent = self.build_ydvars_global_gfl_ptr(parent) + + _type = var.type + if 'gfl_ptr' in var.basename.lower(): + _type = parent.variable_map['gfl_ptr_g'].type + + return var.clone(name=var.name.upper().replace('GFL_PTR', 'GFL_PTR_G'), + parent=parent, type=_type) + + def process_body(self, body, definitions, successors, targets, exclude_arrays): + + # build list of type-bound array access using the horizontal index + _vars = [var for var in FindVariables(unique=False).visit(body) + if isinstance(var, Array) and var.parents and self.horizontal.index in var.dimensions] + + # build list of type-bound view pointers passed as subroutine arguments + for call in FindNodes(ir.CallStatement).visit(body): + if call.name in targets: + _args = {a: d for d, a in call.arg_map.items() if isinstance(d, Array)} + _vars += [a for a, d in _args.items() + if any(v in d.shape for v in self.horizontal.size_expressions) and a.parents] + + # replace per-block view pointers with full field pointers + vmap = {var: var.clone(name=var.name_parts[-1] + '_FIELD', + type=var.parent.variable_map[var.name_parts[-1] + '_FIELD'].type) + for var in _vars} + + # replace thread-private GFL_PTR with global + if self.global_gfl_ptr: + vmap.update({v: self.build_ydvars_global_gfl_ptr(vmap.get(v, v)) + for v in FindVariables(unique=False).visit(body) if 'ydvars%gfl_ptr' in v.name.lower()}) + vmap = recursive_expression_map_update(vmap) + + # filter out arrays marked for exclusion + vmap = {k: v for k, v in vmap.items() if not any(e in k for e in exclude_arrays)} + + # propagate dummy field_api wrapper definitions to children + self.propagate_defs_to_children(self._key, definitions, successors) + + # finally we perform the substitution + return SubstituteExpressions(vmap).visit(body) + + + def process_kernel(self, routine, item, successors, targets, exclude_arrays): + + # Sanitize the subroutine + resolve_associates(routine) + v_index = SCCBaseTransformation.get_integer_variable(routine, name=self.horizontal.index) + SCCBaseTransformation.resolve_masked_stmts(routine, loop_variable=v_index) + + # Bail if routine is marked as sequential or routine has already been processed + if SCCBaseTransformation.check_routine_pragmas(routine, directive=None): + return + + bounds = SCCBaseTransformation.get_horizontal_loop_bounds(routine, self.horizontal) + SCCBaseTransformation.resolve_vector_dimension(routine, loop_variable=v_index, bounds=bounds) + + # for kernels we process the entire body + routine.body = self.process_body(routine.body, item.trafo_data[self._key]['definitions'], + successors, targets, exclude_arrays) + + +class InjectBlockIndexTransformation(Transformation): + """ + A transformation pass to inject the block-index in arrays promoted by a previous transformation pass. As such, + this transformation also relies on the block-index, or a known alias, being *already* present in routines that + are to be transformed. + + For array access in a :any:`Subroutine` body, it operates by comparing the local shape of an array with its + declared shape. If the local shape is of rank one less than the declared shape, then the block-index is appended + to the array's dimensions. + + For :any:`CallStatement` arguments, if the rank of the argument is one less than that of the corresponding + dummy-argument, the block-index is appended to the argument's dimensions. It should be noted that this logic relies + on the :any:`CallStatement` being free of any sequence-association. + + For example, the following code: + + .. code-block:: fortran + + subroutine kernel1(nblks, ...) + ... + integer, intent(in) :: nblks + integer :: ibl + real :: var(jlon,nlev,nblks) + + do ibl=1,nblks + do jlon=1,nproma + var(jlon,:) = 0. + enddo + + call kernel2(var,...) + enddo + ... + end subroutine kernel1 + + subroutine kernel2(var, ...) + ... + real :: var(jlon,nlev) + end subroutine kernel2 + + is transformed to: + + .. code-block:: fortran + + subroutine kernel1(nblks, ...) + ... + integer, intent(in) :: nblks + integer :: ibl + real :: var(jlon,nlev,nblks) + + do ibl=1,nblks + do jlon=1,nproma + var(jlon,:,ibl) = 0. + enddo + + call kernel2(var(:,:,ibl),...) + enddo + ... + end subroutine kernel1 + + subroutine kernel2(var, ...) + ... + real :: var(jlon,nlev) + end subroutine kernel2 + + Specific arrays in individual routines can also be marked for exclusion from this transformation by assigning + them to the `exclude_arrays` list in the :any:`SchedulerConfig`. + + Parameters + ---------- + block_dim : :any:`Dimension` + :any:`Dimension` object describing the variable conventions used in code + to define the blocking data dimension and iteration space. + key : str, optional + Specify a different identifier under which trafo_data is stored + """ + + # This trafo only operates on procedures + item_filter = (ProcedureItem,) + + def __init__(self, block_dim): + self.block_dim = block_dim + + def transform_subroutine(self, routine, **kwargs): + + role = kwargs['role'] + targets = tuple(str(t).lower() for t in as_tuple(kwargs.get('targets', None))) + + exclude_arrays = [] + if (item := kwargs.get('item', None)): + exclude_arrays = item.config.get('exclude_arrays', []) + + if role == 'kernel': + self.process_kernel(routine, targets, exclude_arrays) + + #TODO: we also need to process any code inside a loki/acdc parallel pragma at the driver layer + + @staticmethod + def get_call_arg_rank(arg): + """ + Utility to retrieve the local rank of a :any:`CallStatement` argument. + """ + + rank = len(getattr(arg, 'shape', ())) + if getattr(arg, 'dimensions', None): + # We assume here that the callstatement is free of sequence association + rank = rank - len([d for d in arg.dimensions if not isinstance(d, RangeIndex)]) + + return rank + + def get_block_index(self, routine): + """ + Utility to retrieve the block-index loop induction variable. + """ + + variable_map = routine.variable_map + if (block_index := variable_map.get(self.block_dim.index, None)): + return block_index + if (block_index := [i for i in self.block_dim.index_expressions + if i.split('%', maxsplit=1)[0] in variable_map]): + return routine.resolve_typebound_var(block_index[0], variable_map) + return None + + def process_body(self, body, block_index, targets, exclude_arrays): + # The logic for callstatement args differs from other variables in the body, + # so we build a list to filter + call_args = [a for call in FindNodes(ir.CallStatement).visit(body) for a in call.arg_map.values()] + + # First get rank mismatched call statement args + vmap = {} + for call in FindNodes(ir.CallStatement).visit(body): + if call.name in targets: + for dummy, arg in call.arg_map.items(): + arg_rank = self.get_call_arg_rank(arg) + dummy_rank = len(getattr(dummy, 'shape', ())) + if arg_rank - 1 == dummy_rank: + dimensions = getattr(arg, 'dimensions', None) or ((RangeIndex((None, None)),) * (arg_rank - 1)) + vmap.update({arg: arg.clone(dimensions=dimensions + as_tuple(block_index))}) + + # Now get the rest of the variables + for var in FindVariables(unique=False).visit(body): + if getattr(var, 'dimensions', None) and not var in call_args: + + local_rank = len(var.dimensions) + decl_rank = local_rank + # we assume here that all derived-type components we wish to transform + # have been parsed + if getattr(var, 'shape', None): + decl_rank = len(var.shape) + + if local_rank == decl_rank - 1: + dimensions = getattr(var, 'dimensions', None) or ((RangeIndex((None, None)),) * (decl_rank - 1)) + vmap.update({var: var.clone(dimensions=dimensions + as_tuple(block_index))}) + + # filter out arrays marked for exclusion + vmap = {k: v for k, v in vmap.items() if not any(e in k for e in exclude_arrays)} + + # finally we perform the substitution + return SubstituteExpressions(vmap).visit(body) + + def process_kernel(self, routine, targets, exclude_arrays): + + # we skip routines that do not contain the block index or any known alias + if not (block_index := self.get_block_index(routine)): + return + + # for kernels we process the entire subroutine body + routine.body = self.process_body(routine.body, block_index, targets, exclude_arrays) diff --git a/loki/transformations/pool_allocator.py b/loki/transformations/pool_allocator.py index 83116599b..6916c83c6 100644 --- a/loki/transformations/pool_allocator.py +++ b/loki/transformations/pool_allocator.py @@ -570,7 +570,7 @@ def _get_c_sizeof_arg(self, arr): elif arr.type.dtype == BasicType.COMPLEX: param = Cast(name='CMPLX', expression=(IntLiteral(1), IntLiteral(1))) - param.kind = getattr(arr.type, 'kind', None) + param.kind = getattr(arr.type, 'kind', None) # pylint: disable=possibly-used-before-assignment return param diff --git a/loki/transformations/single_column/scc_cuf.py b/loki/transformations/single_column/scc_cuf.py index 14fedd35b..8a54132a9 100644 --- a/loki/transformations/single_column/scc_cuf.py +++ b/loki/transformations/single_column/scc_cuf.py @@ -724,7 +724,7 @@ def transform_subroutine(self, routine, **kwargs): remove_pragmas(routine) single_variable_declaration(routine=routine, group_by_shape=True) - device_subroutine_prefix(routine, depth) + device_subroutine_prefix(routine, depth) # pylint: disable=possibly-used-before-assignment routine.spec.prepend(ir.Import(module="cudafor")) diff --git a/loki/transformations/tests/test_block_index_inject.py b/loki/transformations/tests/test_block_index_inject.py new file mode 100644 index 000000000..26ded84ad --- /dev/null +++ b/loki/transformations/tests/test_block_index_inject.py @@ -0,0 +1,386 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from shutil import rmtree +import pytest + +from loki import ( + Dimension, gettempdir, Scheduler, OMNI, FindNodes, Assignment, FindVariables, CallStatement, Subroutine, + Item, available_frontends +) +from loki.transformations import BlockViewToFieldViewTransformation, InjectBlockIndexTransformation + +@pytest.fixture(scope='module', name='horizontal') +def fixture_horizontal(): + return Dimension(name='horizontal', size='nlon', index='jl', bounds=('start', 'end'), + aliases=('nproma',), bounds_aliases=('bnds%start', 'bnds%end')) + + +@pytest.fixture(scope='module', name='blocking') +def fixture_blocking(): + return Dimension(name='blocking', size='nb', index='ibl', index_aliases='bnds%kbl') + + +@pytest.fixture(scope='function', name='config') +def fixture_config(): + """ + Default configuration dict with basic options. + """ + return { + 'default': { + 'mode': 'idem', + 'role': 'kernel', + 'expand': True, + 'strict': True, + 'enable_imports': True, + 'disable': ['*%init', '*%final'] + }, + } + + +@pytest.fixture(scope='module', name='blockview_to_fieldview_code', params=[True, False]) +def fixture_blockview_to_fieldview_code(request): + fcode = { + #------------- + 'variable_mod': ( + #------------- +""" +module variable_mod + implicit none + + type variable_3d + real, pointer :: p(:,:) => null() + real, pointer :: p_field(:,:,:) => null() + end type variable_3d + + type variable_3d_ptr + integer :: comp + type(variable_3d), pointer :: ptr => null() + end type variable_3d_ptr + +end module variable_mod +""" + ).strip(), + #-------------------- + 'field_variables_mod': ( + #-------------------- +""" +module field_variables_mod + use variable_mod, only: variable_3d, variable_3d_ptr + implicit none + + type field_variables + type(variable_3d_ptr), allocatable :: gfl_ptr_g(:) + type(variable_3d_ptr), pointer :: gfl_ptr(:) => null() + type(variable_3d) :: var + end type field_variables + +end module field_variables_mod +""" + ).strip(), + #------------------- + 'container_type_mod': ( + #------------------- +""" +module container_type_mod + implicit none + + type container_3d_var + real, pointer :: p(:,:) => null() + real, pointer :: p_field(:,:,:) => null() + end type container_3d_var + + type container_type + type(container_3d_var), allocatable :: vars(:) + end type container_type + +end module container_type_mod +""" + ).strip(), + #-------------- + 'dims_type_mod': ( + #-------------- +""" +module dims_type_mod + type dims_type + integer :: start, end, kbl, nb + end type dims_type +end module dims_type_mod +""" + ).strip(), + #------- + 'driver': ( + #------- +f""" +subroutine driver(data, ydvars, container, nlon, nlev, {'start, end, nb' if request.param else 'bnds'}) + use field_array_module, only: field_3rb_array + use container_type_mod, only: container_type + use field_variables_mod, only: field_variables + {'use dims_type_mod, only: dims_type' if not request.param else ''} + implicit none + + #include "kernel.intfb.h" + + real, intent(inout) :: data(:,:,:) + integer, intent(in) :: nlon, nlev + type(field_variables), intent(inout) :: ydvars + type(container_type), intent(inout) :: container + {'integer, intent(in) :: start, end, nb' if request.param else 'type(dims_type), intent(in) :: bnds'} + + integer :: ibl + type(field_3rb_array) :: yla_data + + call yla_data%init(data) + + do ibl=1,{'nb' if request.param else 'bnds%nb'} + {'bnds%kbl = ibl' if not request.param else ''} + call kernel(nlon, nlev, {'start, end, ibl' if request.param else 'bnds'}, ydvars, container, yla_data) + enddo + + call yla_data%final() + +end subroutine driver +""" + ).strip(), + #------- + 'kernel': ( + #------- +f""" +subroutine kernel(nlon, nlev, {'start, end, ibl' if request.param else 'bnds'}, ydvars, container, yla_data) + use field_array_module, only: field_3rb_array + use container_type_mod, only: container_type + use field_variables_mod, only: field_variables + {'use dims_type_mod, only: dims_type' if not request.param else ''} + implicit none + + #include "another_kernel.intfb.h" + + integer, intent(in) :: nlon, nlev + type(field_variables), intent(inout) :: ydvars + type(container_type), intent(inout) :: container + {'integer, intent(in) :: start, end, ibl' if request.param else 'type(dims_type), intent(in) :: bnds'} + type(field_3rb_array), intent(inout) :: yda_data + + integer :: jl, jfld + {'associate(start=>bnds%start, end=>bnds%end, ibl=>bnds%kbl)' if not request.param else ''} + + ydvars%var%p_field(:,:) = 0. !... this should only get the block-index + ydvars%var%p_field(:,:,ibl) = 0. !... this should be untouched + + yda_data%p(start:end,:) = 1 + ydvars%var%p(start:end,:) = 1 + + do jfld=1,size(ydvars%gfl_ptr) + do jl=start,end + ydvars%gfl_ptr(jfld)%ptr%p(jl,:) = yda_data%p(jl,:) + container%vars(ydvars%gfl_ptr(jfld)%comp)%p(jl,:) = 0. + enddo + enddo + + call another_kernel(nlon, nlev, data=yda_data%p) + + {'end associate' if not request.param else ''} +end subroutine kernel +""" + ).strip(), + #------- + 'another_kernel': ( + #------- +""" +subroutine another_kernel(nproma, nlev, data) + implicit none + !... not a sequential routine but still labelling it as one to test the + !... bail-out mechanism + !$loki routine seq + integer, intent(in) :: nproma, nlev + real, intent(inout) :: data(nproma, nlev) +end subroutine another_kernel +""" + ).strip() + } + + workdir = gettempdir()/'test_blockview_to_fieldview' + if workdir.exists(): + rmtree(workdir) + workdir.mkdir() + for name, code in fcode.items(): + (workdir/f'{name}.F90').write_text(code) + + yield workdir, request.param + + rmtree(workdir) + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI fails to import undefined module.')])) +def test_blockview_to_fieldview_pipeline(horizontal, blocking, config, frontend, blockview_to_fieldview_code): + + config['routines'] = { + 'driver': {'role': 'driver'} + } + + scheduler = Scheduler( + paths=(blockview_to_fieldview_code[0],), config=config, seed_routines='driver', frontend=frontend + ) + scheduler.process(BlockViewToFieldViewTransformation(horizontal, global_gfl_ptr=True)) + scheduler.process(InjectBlockIndexTransformation(blocking)) + + kernel = scheduler['#kernel'].ir + aliased_bounds = not blockview_to_fieldview_code[1] + ibl_expr = blocking.index + if aliased_bounds: + ibl_expr = blocking.index_expressions[1] + + assigns = FindNodes(Assignment).visit(kernel.body) + + # check that access pointers for arrays without horizontal index in dimensions were not updated + assert assigns[0].lhs == f'ydvars%var%p_field(:,:,{ibl_expr})' + assert assigns[1].lhs == f'ydvars%var%p_field(:,:,{ibl_expr})' + + # check that vector notation was resolved correctly + assert assigns[2].lhs == f'yda_data%p_field(jl, :, {ibl_expr})' + assert assigns[3].lhs == f'ydvars%var%p_field(jl, :, {ibl_expr})' + + # check thread-local ydvars%gfl_ptr was replaced with its global equivalent + gfl_ptr_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr' in v.name.lower()} + gfl_ptr_g_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr_g' in v.name.lower()} + assert gfl_ptr_g_vars + assert not gfl_ptr_g_vars - gfl_ptr_vars + + assert assigns[4].lhs == f'ydvars%gfl_ptr_g(jfld)%ptr%p_field(jl,:,{ibl_expr})' + assert assigns[4].rhs == f'yda_data%p_field(jl,:,{ibl_expr})' + assert assigns[5].lhs == f'container%vars(ydvars%gfl_ptr_g(jfld)%comp)%p_field(jl,:,{ibl_expr})' + + # check callstatement was updated correctly + call = FindNodes(CallStatement).visit(kernel.body)[0] + assert f'yda_data%p_field(:,:,{ibl_expr})' in call.arg_map.values() + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI fails to import undefined module.')])) +@pytest.mark.parametrize('global_gfl_ptr', [False, True]) +def test_blockview_to_fieldview_only(horizontal, blocking, config, frontend, blockview_to_fieldview_code, + global_gfl_ptr): + + config['routines'] = { + 'driver': {'role': 'driver'} + } + + scheduler = Scheduler( + paths=(blockview_to_fieldview_code[0],), config=config, seed_routines='driver', frontend=frontend + ) + scheduler.process(BlockViewToFieldViewTransformation(horizontal, global_gfl_ptr=global_gfl_ptr)) + + kernel = scheduler['#kernel'].ir + aliased_bounds = not blockview_to_fieldview_code[1] + ibl_expr = blocking.index + if aliased_bounds: + ibl_expr = blocking.index_expressions[1] + + assigns = FindNodes(Assignment).visit(kernel.body) + + # check that access pointers for arrays without horizontal index in dimensions were not updated + assert assigns[0].lhs == 'ydvars%var%p_field(:,:)' + assert assigns[1].lhs == f'ydvars%var%p_field(:,:,{ibl_expr})' + + # check that vector notation was resolved correctly + assert assigns[2].lhs == 'yda_data%p_field(jl, :)' + assert assigns[3].lhs == 'ydvars%var%p_field(jl, :)' + + # check thread-local ydvars%gfl_ptr was replaced with its global equivalent + if global_gfl_ptr: + gfl_ptr_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr' in v.name.lower()} + gfl_ptr_g_vars = {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr_g' in v.name.lower()} + assert gfl_ptr_g_vars + assert not gfl_ptr_g_vars - gfl_ptr_vars + else: + assert not {v for v in FindVariables().visit(kernel.body) if 'ydvars%gfl_ptr_g' in v.name.lower()} + + assert assigns[4].rhs == 'yda_data%p_field(jl,:)' + if global_gfl_ptr: + assert assigns[4].lhs == 'ydvars%gfl_ptr_g(jfld)%ptr%p_field(jl,:)' + assert assigns[5].lhs == 'container%vars(ydvars%gfl_ptr_g(jfld)%comp)%p_field(jl,:)' + else: + assert assigns[4].lhs == 'ydvars%gfl_ptr(jfld)%ptr%p_field(jl,:)' + assert assigns[5].lhs == 'container%vars(ydvars%gfl_ptr(jfld)%comp)%p_field(jl,:)' + + # check callstatement was updated correctly + call = FindNodes(CallStatement).visit(kernel.body)[0] + assert 'yda_data%p_field' in call.arg_map.values() + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI correctly complains about rank mismatch in assignment.')])) +def test_simple_blockindex_inject(blocking, frontend): + fcode = """ +subroutine kernel(nlon,nlev,nb,var) + implicit none + + interface + subroutine compute(nlon,nlev,var) + implicit none + integer, intent(in) :: nlon,nlev + real, intent(inout) :: var(nlon,nlev) + end subroutine compute + end interface + + integer, intent(in) :: nlon,nlev,nb + real, intent(inout) :: var(nlon,nlev,4,nb) !... this dummy arg was potentially promoted by a previous transformation + + integer :: ibl + + do ibl=1,nb !... this loop was potentially lowered by a previous transformation + var(:,:,:) = 0. + call compute(nlon,nlev,var(:,:,1)) + enddo + +end subroutine kernel +""" + + kernel = Subroutine.from_source(fcode, frontend=frontend) + InjectBlockIndexTransformation(blocking).apply(kernel, role='kernel', targets=('compute',)) + + assigns = FindNodes(Assignment).visit(kernel.body) + assert assigns[0].lhs == 'var(:,:,:,ibl)' + + calls = FindNodes(CallStatement).visit(kernel.body) + assert 'var(:,:,1,ibl)' in calls[0].arguments + + +@pytest.mark.parametrize('frontend', available_frontends(xfail=[(OMNI, + 'OMNI complains about undefined type.')])) +def test_blockview_to_fieldview_exception(frontend, horizontal): + fcode = """ +subroutine kernel(nlon,nlev,start,end,var) + implicit none + + interface + subroutine compute(nlon,nlev,var) + implicit none + integer, intent(in) :: nlon,nlev + real, intent(inout) :: var(nlon,nlev) + end subroutine compute + end interface + + integer, intent(in) :: nlon,nlev,start,end + type(wrapped_field) :: var + + call compute(nlon,nlev,var%p) + +end subroutine kernel +""" + + kernel = Subroutine.from_source(fcode, frontend=frontend) + item = Item(name='#kernel', source=kernel) + item.trafo_data['BlockViewToFieldViewTransformation'] = {'definitions': []} + with pytest.raises(KeyError): + BlockViewToFieldViewTransformation(horizontal).apply(kernel, item=item, role='kernel', + targets=('compute',)) + + with pytest.raises(RuntimeError): + BlockViewToFieldViewTransformation(horizontal).apply(kernel, role='kernel', + targets=('compute',)) diff --git a/scripts/loki_transform.py b/scripts/loki_transform.py index f9b09d660..fc4c86ca6 100644 --- a/scripts/loki_transform.py +++ b/scripts/loki_transform.py @@ -183,6 +183,8 @@ def convert( scheduler.process( config.pipelines[mode] ) + mode = mode.replace('-', '_') # Sanitize mode string + # Write out all modified source files into the build directory file_write_trafo = FileWriteTransformation(builddir=build, mode=mode) scheduler.process(transformation=file_write_trafo)