Skip to content

Commit

Permalink
Merge pull request #9 from ecmwf-ifs/develop
Browse files Browse the repository at this point in the history
Merge develop to main
  • Loading branch information
mlange05 authored Feb 11, 2022
2 parents 96e2ad1 + 7aaa028 commit 8cc1c8b
Show file tree
Hide file tree
Showing 27 changed files with 487 additions and 235 deletions.
37 changes: 37 additions & 0 deletions .github/scripts/run-targets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
set -euo pipefail
set -x

# These targets don't have an MPI-parallel driver routine
non_mpi_targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-c)

# These targets currently cause issues and are therefore not tested
skipped_targets=(dwarf-cloudsc-gpu-claw)

exit_code=0
cd build

#
# Run each of the binaries with default NPROMA and validate exit codes
#

for target in $(ls bin)
do
# Skip some targets
if [[ " ${skipped_targets[*]} " =~ " $target " ]]
then
continue
fi

if [[ "$mpi_flag" == "--with-mpi" && ! " ${non_mpi_targets[*]} " =~ " $target " ]]
then
# Two ranks with one thread each, default NPROMA
mpirun -np 2 bin/$target 1 100
else
# Single thread, default NPROMA
bin/$target 1 100
fi
exit_code=$((exit_code + $?))
done

exit $exit_code
44 changes: 44 additions & 0 deletions .github/scripts/verify-targets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
set -euo pipefail
set -x

exit_code=0

#
# Build the list of targets
#

targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-fortran)

if [[ "$gpu_flag" == "--with-gpu" ]]
then
targets+=(dwarf-cloudsc-gpu-claw dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist)
fi

#
# Verify each target exists
#
echo "::debug::Expected targets: ${targets[@]}"

for target in "${targets[@]}"
do
if [[ ! -f build/bin/$target ]]
then
exit_code=1
echo "::error::Missing target: $target"
fi
done

#
# Check there aren't any other binaries
#

if [[ ${#targets[@]} -lt $(ls build/bin | wc -l) ]]
then
exit_code=1
echo "::error::Additional targets found in build/bin"
echo "::error::Expected targets: ${targets[@]}"
echo "::error::Found targets: $(ls -1 build/bin | tr '\n' ' ')"
fi

exit $exit_code
77 changes: 77 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: build

# Controls when the workflow will run
on:
# Triggers the workflow on push events
push:
branches: [ '**' ]
tags-ignore: [ '**' ]

# Triggers the workflow on pull request events
pull_request:

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }}

# The type of runner that the job will run on
runs-on: ubuntu-latest

strategy:
fail-fast: false # false: try to complete all jobs

matrix:

arch:
- github/ubuntu/gnu/9.3.0

io_library_flag: [''] # Switch between Serialbox and HDF5
# FIXME: serialbox builds are currently disabled until a compatible serialbox version is available to Github actions

mpi_flag: ['', '--with-mpi'] # Enable MPI-parallel build

prec_flag: ['', '--single-precision'] # Switch single/double precision

gpu_flag: ['', '--with-gpu'] # GPU-variants enabled

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2

# Sets-up environment and installs required packages
- name: Environment setup
run: |
[[ "${{ matrix.mpi_flag }}" == "--with-mpi" ]] && sudo apt install libopenmpi-dev || true
[[ "${{ matrix.io_library_flag }}" != "--with-serialbox" ]] && sudo apt install libhdf5-dev || true
# Check-out dependencies as part of the bundle creation
- name: Bundle create
run: ./cloudsc-bundle create

# Build the targets
- name: Bundle build
run: |
./cloudsc-bundle build --retry-verbose \
--arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \
${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }}
# Verify targets exist
- name: Verify targets
env:
io_library_flag: ${{ matrix.io_library_flag }}
gpu_flag: ${{ matrix.gpu_flag }}
run: .github/scripts/verify-targets.sh

# Run double-precision targets
# (Mind the exclusions inside the script!)
- name: Run targets
env:
mpi_flag: ${{ matrix.mpi_flag }}
if: ${{ matrix.prec_flag == '' }}
run: .github/scripts/run-targets.sh
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
*~
ecbundle
ecbundle/*
build/*
source/*
serialbox2hdf5/venv
serialbox2hdf5/serialbox
26 changes: 14 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ cmake_minimum_required( VERSION 3.12 FATAL_ERROR )
find_package( ecbuild REQUIRED )

# define the project
project(dwarf-P-cloudMicrophysics-IFSScheme LANGUAGES C Fortran )
project( dwarf-p-cloudsc LANGUAGES C Fortran )

include( cmake/compat.cmake )

Expand Down Expand Up @@ -62,20 +62,22 @@ if( MPI_Fortran_FOUND AND ENABLE_MPI )
endif()

### HDF5
if( NOT DEFINED ENABLE_HDF5 OR ENABLE_HDF5 )
ecbuild_find_package( NAME HDF5
COMPONENTS Fortran )
endif()
if( HDF5_FOUND AND ENABLE_HDF5 )
ecbuild_add_option( FEATURE HDF5
DESCRIPTION "HDF5" DEFAULT OFF
CONDITION HDF5_FOUND )
ecbuild_add_option( FEATURE HDF5
DESCRIPTION "Use HDF5 to read input and reference data"
REQUIRED_PACKAGES "HDF5 COMPONENTS Fortran"
CONDITION NOT DEFINED ENABLE_HDF5 OR ENABLE_HDF5
DEFAULT ON )
if( HDF5_FOUND )
list(APPEND CLOUDSC_DEFINITIONS HAVE_HDF5 ${HDF5_Fortran_DEFINITIONS})
endif()

# Add Serialbox utility package for platform-agnostic file I/O
find_package( Serialbox )
if( Serialbox_FOUND )
ecbuild_add_option( FEATURE SERIALBOX
DESCRIPTION "Use Serialbox to read input and reference data"
REQUIRED_PACKAGES "Serialbox"
CONDITION (NOT DEFINED ENABLE_SERIALBOX OR ENABLE_SERIALBOX) AND NOT HDF5_FOUND
DEFAULT OFF )
if( SERIALBOX_FOUND )
list(APPEND CLOUDSC_DEFINITIONS HAVE_SERIALBOX)
endif()

Expand Down Expand Up @@ -105,7 +107,7 @@ if(HAVE_DOCS)
endif()

# finalize
ecbuild_install_project(NAME dwarf-P-cloudMicrophysics-IFSScheme)
ecbuild_install_project(NAME dwarf-p-cloudsc)

# print summary
ecbuild_print_summary()
73 changes: 44 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# dwarf-p-cloudsc

[![license](https://img.shields.io/github/license/ecmwf-ifs/dwarf-p-cloudsc)](https://www.apache.org/licenses/LICENSE-2.0.html)
[![build](https://github.com/ecmwf-ifs/dwarf-p-cloudsc/actions/workflows/build.yml/badge.svg)](https://github.com/ecmwf-ifs/dwarf-p-cloudsc/actions/workflows/build.yml)

`dwarf-p-cloudsc` is intended to test the CLOUDSC cloud microphysics scheme of the IFS.

*This package is made available to support research collaborations and is not
Expand All @@ -8,8 +11,8 @@ officially supported by ECMWF*
## Contact

Michael Lange ([email protected]),
Willem Deconinck ([email protected])
Balthasar Reuter ([email protected]),
Willem Deconinck ([email protected]),
Balthasar Reuter ([email protected])

## Licence

Expand All @@ -23,11 +26,11 @@ Balthasar Reuter ([email protected]),
optimized on the Cray system at ECMWF.
- **dwarf-cloudsc-fortran**: A cleaned up version of the CLOUDSC
prototype that validates runs against platform and language-agnostic
off-line reference data via the Serialbox package. The kernel code
off-line reference data via HDF5 or the Serialbox package. The kernel code
also is slightly cleaner than the original version.
- **dwarf-cloudsc-c**: Standalone C version of the kernel that has
been generated by ECMWF tools. This also requires the serialbox
validation mechanism as above.
been generated by ECMWF tools. This relies exclusively on the Serialbox
validation mechanism.
- **dwarf-cloudsc-gpu-kernels**: GPU-enabled version of the CLOUDSC dwarf
that uses OpenACC and relies on the `!$acc kernels` directive to offload
the computational kernel.
Expand Down Expand Up @@ -73,7 +76,7 @@ install the bundle via:

```sh
./cloudsc-bundle create # Checks out dependency packages
./cloudsc-bundle build [--build-type=debug|bit|release] [--arch=$PWD/arch/ecmwf/machine/compiler/version/env.sh]
./cloudsc-bundle build [--build-type=debug|bit|release] [--arch=./arch/ecmwf/machine/compiler/version]
```

The individual prototype variants of the dwarf are managed as ECBuild features
Expand All @@ -89,11 +92,12 @@ has proven difficult with certain compiler toolchains.
### GPU versions of CLOUDSC

The GPU-enabled versions of the dwarf are by default disabled. To
enable them use the `--with-gpu` flag. For example:
enable them use the `--with-gpu` flag. For example to build on the in-house
volta machine:

```sh
./cloudsc-bundle create # Checks out dependency packages
./cloudsc-bundle build --clean --with-gpu --arch=$PWD/arch/ecmwf/volta/pgi-gpu/20.9/env.sh
./cloudsc-bundle build --clean --with-gpu --arch=./arch/ecmwf/volta/nvhpc/20.9
```

### MPI-enabled versions of CLOUDSC
Expand All @@ -103,7 +107,7 @@ MPI support by providing the `--with-mpi` flag. For example on volta:

```sh
./cloudsc-bundle create
./cloudsc-bundle build --clean --with-mpi --with-gpu --arch=$PWD/arch/ecmwf/volta/pgi-gpu/20.9/env.sh
./cloudsc-bundle build --clean --with-mpi --with-gpu --arch=./arch/ecmwf/volta/nvhpc/20.9
```

Running with MPI parallelization distributes the columns of the working set
Expand All @@ -112,6 +116,16 @@ each rank. Results are gathered from all ranks and reported for the global
working set. Performance numbers are also gathered and reported per thread,
per rank and total.

**Important:** If the total size of the working set (2nd argument, see
"[Running and testing](#running-and-testing)") **exceeds** the number of
columns in the input file (the input data in the repository consists of just
100 columns), every rank derives its working set by replicating the columns in
the input file, starting with the first column in the file. This means, all
ranks effectively work on the same data set.
If the total size of the working set is **less than or equal** to the number of
columns in the input file, these are truly distributed and every rank ends up
with a different working set.

When running with multiple GPUs each rank needs to be assigned a different
device. This can be achieved using the `CUDA_VISIBLE_DEVICES` environment
variable:
Expand All @@ -120,12 +134,19 @@ variable:
mpirun -np 2 bash -c "CUDA_VISIBLE_DEVICES=\${OMPI_COMM_WORLD_RANK} bin/dwarf-cloudsc-gpu-claw 1 163840 8192"
```

### HDF5 input file support
### Choosing between HDF5 and Serialbox input file format

The default build configuration relies on HDF5 input and reference data for
dwarf-cloudsc-fortran as well as GPU and Loki versions. The original
dwarf-P-cloudMicrophysics-IFSScheme always uses raw Fortran binary format.

As an alternative to Serialbox, versions dwarf-cloudsc-fortran as well as GPU
and Loki versions can use HDF5 files for input and reference data. To enable this,
use the `--with-hdf5` flag (note that this disables Serialbox support).
Please note : the hdf5 installation needs to have the f03 interfaces installed.
**Please note:** The HDF55 installation needs to have the f03 interfaces installed.

As an alternative to HDF5, the [Serialbox](https://github.com/GridTools/serialbox)
library can be used to load input and reference data. This, however, requires
certain boost libraries or its own internal experimental filesystem, both of
which proved difficult on certain compiler toolchains or more exotic hardware
architectures.

The original input is provided as raw Fortran binary in prototype1, but
input and reference data can be regenerated from this variant by running
Expand All @@ -136,7 +157,9 @@ CLOUDSC_WRITE_REFERENCE=1 ./bin/dwarf-P-cloudMicrophysics-IFSScheme 1 100 100
```

Note that this is only available via Serialbox at the moment. Updates to HDF5
input or reference data have to be done via manual conversion.
input or reference data have to be done via manual conversion. A small
Python script for this with usage instructions can be found in the
[serialbox2hdf5](serialbox2hdf5/README.md) directory.

### A64FX version of CLOUDSC

Expand Down Expand Up @@ -170,9 +193,9 @@ export OMP_NUM_THREADS=64
OMP_PLACES="{$(seq -s '},{' 0 $(($OMP_NUM_THREADS-1)) )}" srun -q np --ntasks=1 --hint=nomultithread --cpus-per-task=$OMP_NUM_THREADS ./bin/dwarf-cloudsc-fortran $OMP_NUM_THREADS 163840 32
```

For a build with the intel 2021.1.1 compiler, performance of ~74 GF is achieved.
For a build with the Intel 2021.1.1 compiler, performance of ~74 GF is achieved.

### Loki transformations for CLOUDSC
## Loki transformations for CLOUDSC

Loki is an in-house developed source-to-source translation tool that
allows us to create bespoke transformations for the IFS to target and
Expand All @@ -184,7 +207,7 @@ conversion to C and GPU via downstream tools like CLAW.
To use the Loki demonstrators, Loki and CLAW need to be installed as
described in the
[Loki install instructions](https://git.ecmwf.int/projects/RDX/repos/loki/browse/INSTALL.md).
_Please note that the in-house "volta" machine needs some manual workarounds for this atm._
*Please note that the in-house "volta" machine needs some manual workarounds for this atm.*

Once Loki and CLAW are installed and activated via `source loki-activate`,
the following build flags enable the demonstrator build targets:
Expand All @@ -193,10 +216,10 @@ the following build flags enable the demonstrator build targets:
# For general use on workstations with GNU
# Please note that OpenACC needs to be disable with GNU,
# since CLAW-generated code currently does not comply with GNU.
./cloudsc-bundle build --clean --with-loki --loki-frontend=fp --cmake="ENABLE_ACC=OFF" --arch=$PWD/arch/ecmwf/leap42/gnu/7.3.0/env.sh
./cloudsc-bundle build --clean --with-loki --loki-frontend=fp --arch=./arch/ecmwf/leap42/gnu/7.3.0

# For GPU exploration on volta
./cloudsc-bundle build --clean [--with-gpu]--with-loki --loki-frontend=fp --arch=$PWD/arch/ecmwf/volta/pgi-gpu/20.9/env.sh
./cloudsc-bundle build --clean [--with-gpu]--with-loki --loki-frontend=fp --arch=./arch/ecmwf/volta/nvhpc/20.9
```

The following Loki modes are included in the dwarf, each with a bespoke demonstrator build:
Expand All @@ -220,7 +243,7 @@ The following Loki modes are included in the dwarf, each with a bespoke demonstr
the kernel to C and calls it via iso_c_bindings interfaces from the
driver.

#### A note on frontends
### A note on frontends

Loki currently supports three frontends to parse the Fortran source code:

Expand All @@ -237,11 +260,3 @@ means we require the `.xmod` module description files for utility
routines in `src/common` for processing the CLOUDSC source files with
the OMNI frontend. These are stored in the source under
`src/cloudsc_loki/xmod`.

#### A note on accuracy in Loki variants

The original CLOUDSC kernel contains a bug that forces the use of a single
precision constant for an exponential computation. This has been corrected
in the Loki-specific variants, resulting in small deviations in the final
results for some variables against the reference data from the original
version.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.1.0
1.2.0
Loading

0 comments on commit 8cc1c8b

Please sign in to comment.