-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from ecmwf-ifs/develop
Merge develop to main
- Loading branch information
Showing
27 changed files
with
487 additions
and
235 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/bash | ||
set -euo pipefail | ||
set -x | ||
|
||
# These targets don't have an MPI-parallel driver routine | ||
non_mpi_targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-c) | ||
|
||
# These targets currently cause issues and are therefore not tested | ||
skipped_targets=(dwarf-cloudsc-gpu-claw) | ||
|
||
exit_code=0 | ||
cd build | ||
|
||
# | ||
# Run each of the binaries with default NPROMA and validate exit codes | ||
# | ||
|
||
for target in $(ls bin) | ||
do | ||
# Skip some targets | ||
if [[ " ${skipped_targets[*]} " =~ " $target " ]] | ||
then | ||
continue | ||
fi | ||
|
||
if [[ "$mpi_flag" == "--with-mpi" && ! " ${non_mpi_targets[*]} " =~ " $target " ]] | ||
then | ||
# Two ranks with one thread each, default NPROMA | ||
mpirun -np 2 bin/$target 1 100 | ||
else | ||
# Single thread, default NPROMA | ||
bin/$target 1 100 | ||
fi | ||
exit_code=$((exit_code + $?)) | ||
done | ||
|
||
exit $exit_code |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash | ||
set -euo pipefail | ||
set -x | ||
|
||
exit_code=0 | ||
|
||
# | ||
# Build the list of targets | ||
# | ||
|
||
targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-fortran) | ||
|
||
if [[ "$gpu_flag" == "--with-gpu" ]] | ||
then | ||
targets+=(dwarf-cloudsc-gpu-claw dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist) | ||
fi | ||
|
||
# | ||
# Verify each target exists | ||
# | ||
echo "::debug::Expected targets: ${targets[@]}" | ||
|
||
for target in "${targets[@]}" | ||
do | ||
if [[ ! -f build/bin/$target ]] | ||
then | ||
exit_code=1 | ||
echo "::error::Missing target: $target" | ||
fi | ||
done | ||
|
||
# | ||
# Check there aren't any other binaries | ||
# | ||
|
||
if [[ ${#targets[@]} -lt $(ls build/bin | wc -l) ]] | ||
then | ||
exit_code=1 | ||
echo "::error::Additional targets found in build/bin" | ||
echo "::error::Expected targets: ${targets[@]}" | ||
echo "::error::Found targets: $(ls -1 build/bin | tr '\n' ' ')" | ||
fi | ||
|
||
exit $exit_code |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
name: build | ||
|
||
# Controls when the workflow will run | ||
on: | ||
# Triggers the workflow on push events | ||
push: | ||
branches: [ '**' ] | ||
tags-ignore: [ '**' ] | ||
|
||
# Triggers the workflow on pull request events | ||
pull_request: | ||
|
||
# Allows you to run this workflow manually from the Actions tab | ||
workflow_dispatch: | ||
|
||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel | ||
jobs: | ||
# This workflow contains a single job called "build" | ||
build: | ||
name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} | ||
|
||
# The type of runner that the job will run on | ||
runs-on: ubuntu-latest | ||
|
||
strategy: | ||
fail-fast: false # false: try to complete all jobs | ||
|
||
matrix: | ||
|
||
arch: | ||
- github/ubuntu/gnu/9.3.0 | ||
|
||
io_library_flag: [''] # Switch between Serialbox and HDF5 | ||
# FIXME: serialbox builds are currently disabled until a compatible serialbox version is available to Github actions | ||
|
||
mpi_flag: ['', '--with-mpi'] # Enable MPI-parallel build | ||
|
||
prec_flag: ['', '--single-precision'] # Switch single/double precision | ||
|
||
gpu_flag: ['', '--with-gpu'] # GPU-variants enabled | ||
|
||
# Steps represent a sequence of tasks that will be executed as part of the job | ||
steps: | ||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it | ||
- uses: actions/checkout@v2 | ||
|
||
# Sets-up environment and installs required packages | ||
- name: Environment setup | ||
run: | | ||
[[ "${{ matrix.mpi_flag }}" == "--with-mpi" ]] && sudo apt install libopenmpi-dev || true | ||
[[ "${{ matrix.io_library_flag }}" != "--with-serialbox" ]] && sudo apt install libhdf5-dev || true | ||
# Check-out dependencies as part of the bundle creation | ||
- name: Bundle create | ||
run: ./cloudsc-bundle create | ||
|
||
# Build the targets | ||
- name: Bundle build | ||
run: | | ||
./cloudsc-bundle build --retry-verbose \ | ||
--arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \ | ||
${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} | ||
# Verify targets exist | ||
- name: Verify targets | ||
env: | ||
io_library_flag: ${{ matrix.io_library_flag }} | ||
gpu_flag: ${{ matrix.gpu_flag }} | ||
run: .github/scripts/verify-targets.sh | ||
|
||
# Run double-precision targets | ||
# (Mind the exclusions inside the script!) | ||
- name: Run targets | ||
env: | ||
mpi_flag: ${{ matrix.mpi_flag }} | ||
if: ${{ matrix.prec_flag == '' }} | ||
run: .github/scripts/run-targets.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,7 @@ | ||
*~ | ||
ecbundle | ||
ecbundle/* | ||
build/* | ||
source/* | ||
serialbox2hdf5/venv | ||
serialbox2hdf5/serialbox |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,8 @@ | ||
# dwarf-p-cloudsc | ||
|
||
[![license](https://img.shields.io/github/license/ecmwf-ifs/dwarf-p-cloudsc)](https://www.apache.org/licenses/LICENSE-2.0.html) | ||
[![build](https://github.com/ecmwf-ifs/dwarf-p-cloudsc/actions/workflows/build.yml/badge.svg)](https://github.com/ecmwf-ifs/dwarf-p-cloudsc/actions/workflows/build.yml) | ||
|
||
`dwarf-p-cloudsc` is intended to test the CLOUDSC cloud microphysics scheme of the IFS. | ||
|
||
*This package is made available to support research collaborations and is not | ||
|
@@ -8,8 +11,8 @@ officially supported by ECMWF* | |
## Contact | ||
|
||
Michael Lange ([email protected]), | ||
Willem Deconinck ([email protected]) | ||
Balthasar Reuter ([email protected]), | ||
Willem Deconinck ([email protected]), | ||
Balthasar Reuter ([email protected]) | ||
|
||
## Licence | ||
|
||
|
@@ -23,11 +26,11 @@ Balthasar Reuter ([email protected]), | |
optimized on the Cray system at ECMWF. | ||
- **dwarf-cloudsc-fortran**: A cleaned up version of the CLOUDSC | ||
prototype that validates runs against platform and language-agnostic | ||
off-line reference data via the Serialbox package. The kernel code | ||
off-line reference data via HDF5 or the Serialbox package. The kernel code | ||
also is slightly cleaner than the original version. | ||
- **dwarf-cloudsc-c**: Standalone C version of the kernel that has | ||
been generated by ECMWF tools. This also requires the serialbox | ||
validation mechanism as above. | ||
been generated by ECMWF tools. This relies exclusively on the Serialbox | ||
validation mechanism. | ||
- **dwarf-cloudsc-gpu-kernels**: GPU-enabled version of the CLOUDSC dwarf | ||
that uses OpenACC and relies on the `!$acc kernels` directive to offload | ||
the computational kernel. | ||
|
@@ -73,7 +76,7 @@ install the bundle via: | |
|
||
```sh | ||
./cloudsc-bundle create # Checks out dependency packages | ||
./cloudsc-bundle build [--build-type=debug|bit|release] [--arch=$PWD/arch/ecmwf/machine/compiler/version/env.sh] | ||
./cloudsc-bundle build [--build-type=debug|bit|release] [--arch=./arch/ecmwf/machine/compiler/version] | ||
``` | ||
|
||
The individual prototype variants of the dwarf are managed as ECBuild features | ||
|
@@ -89,11 +92,12 @@ has proven difficult with certain compiler toolchains. | |
### GPU versions of CLOUDSC | ||
|
||
The GPU-enabled versions of the dwarf are by default disabled. To | ||
enable them use the `--with-gpu` flag. For example: | ||
enable them use the `--with-gpu` flag. For example to build on the in-house | ||
volta machine: | ||
|
||
```sh | ||
./cloudsc-bundle create # Checks out dependency packages | ||
./cloudsc-bundle build --clean --with-gpu --arch=$PWD/arch/ecmwf/volta/pgi-gpu/20.9/env.sh | ||
./cloudsc-bundle build --clean --with-gpu --arch=./arch/ecmwf/volta/nvhpc/20.9 | ||
``` | ||
|
||
### MPI-enabled versions of CLOUDSC | ||
|
@@ -103,7 +107,7 @@ MPI support by providing the `--with-mpi` flag. For example on volta: | |
|
||
```sh | ||
./cloudsc-bundle create | ||
./cloudsc-bundle build --clean --with-mpi --with-gpu --arch=$PWD/arch/ecmwf/volta/pgi-gpu/20.9/env.sh | ||
./cloudsc-bundle build --clean --with-mpi --with-gpu --arch=./arch/ecmwf/volta/nvhpc/20.9 | ||
``` | ||
|
||
Running with MPI parallelization distributes the columns of the working set | ||
|
@@ -112,6 +116,16 @@ each rank. Results are gathered from all ranks and reported for the global | |
working set. Performance numbers are also gathered and reported per thread, | ||
per rank and total. | ||
|
||
**Important:** If the total size of the working set (2nd argument, see | ||
"[Running and testing](#running-and-testing)") **exceeds** the number of | ||
columns in the input file (the input data in the repository consists of just | ||
100 columns), every rank derives its working set by replicating the columns in | ||
the input file, starting with the first column in the file. This means, all | ||
ranks effectively work on the same data set. | ||
If the total size of the working set is **less than or equal** to the number of | ||
columns in the input file, these are truly distributed and every rank ends up | ||
with a different working set. | ||
|
||
When running with multiple GPUs each rank needs to be assigned a different | ||
device. This can be achieved using the `CUDA_VISIBLE_DEVICES` environment | ||
variable: | ||
|
@@ -120,12 +134,19 @@ variable: | |
mpirun -np 2 bash -c "CUDA_VISIBLE_DEVICES=\${OMPI_COMM_WORLD_RANK} bin/dwarf-cloudsc-gpu-claw 1 163840 8192" | ||
``` | ||
|
||
### HDF5 input file support | ||
### Choosing between HDF5 and Serialbox input file format | ||
|
||
The default build configuration relies on HDF5 input and reference data for | ||
dwarf-cloudsc-fortran as well as GPU and Loki versions. The original | ||
dwarf-P-cloudMicrophysics-IFSScheme always uses raw Fortran binary format. | ||
|
||
As an alternative to Serialbox, versions dwarf-cloudsc-fortran as well as GPU | ||
and Loki versions can use HDF5 files for input and reference data. To enable this, | ||
use the `--with-hdf5` flag (note that this disables Serialbox support). | ||
Please note : the hdf5 installation needs to have the f03 interfaces installed. | ||
**Please note:** The HDF55 installation needs to have the f03 interfaces installed. | ||
|
||
As an alternative to HDF5, the [Serialbox](https://github.com/GridTools/serialbox) | ||
library can be used to load input and reference data. This, however, requires | ||
certain boost libraries or its own internal experimental filesystem, both of | ||
which proved difficult on certain compiler toolchains or more exotic hardware | ||
architectures. | ||
|
||
The original input is provided as raw Fortran binary in prototype1, but | ||
input and reference data can be regenerated from this variant by running | ||
|
@@ -136,7 +157,9 @@ CLOUDSC_WRITE_REFERENCE=1 ./bin/dwarf-P-cloudMicrophysics-IFSScheme 1 100 100 | |
``` | ||
|
||
Note that this is only available via Serialbox at the moment. Updates to HDF5 | ||
input or reference data have to be done via manual conversion. | ||
input or reference data have to be done via manual conversion. A small | ||
Python script for this with usage instructions can be found in the | ||
[serialbox2hdf5](serialbox2hdf5/README.md) directory. | ||
|
||
### A64FX version of CLOUDSC | ||
|
||
|
@@ -170,9 +193,9 @@ export OMP_NUM_THREADS=64 | |
OMP_PLACES="{$(seq -s '},{' 0 $(($OMP_NUM_THREADS-1)) )}" srun -q np --ntasks=1 --hint=nomultithread --cpus-per-task=$OMP_NUM_THREADS ./bin/dwarf-cloudsc-fortran $OMP_NUM_THREADS 163840 32 | ||
``` | ||
|
||
For a build with the intel 2021.1.1 compiler, performance of ~74 GF is achieved. | ||
For a build with the Intel 2021.1.1 compiler, performance of ~74 GF is achieved. | ||
|
||
### Loki transformations for CLOUDSC | ||
## Loki transformations for CLOUDSC | ||
|
||
Loki is an in-house developed source-to-source translation tool that | ||
allows us to create bespoke transformations for the IFS to target and | ||
|
@@ -184,7 +207,7 @@ conversion to C and GPU via downstream tools like CLAW. | |
To use the Loki demonstrators, Loki and CLAW need to be installed as | ||
described in the | ||
[Loki install instructions](https://git.ecmwf.int/projects/RDX/repos/loki/browse/INSTALL.md). | ||
_Please note that the in-house "volta" machine needs some manual workarounds for this atm._ | ||
*Please note that the in-house "volta" machine needs some manual workarounds for this atm.* | ||
|
||
Once Loki and CLAW are installed and activated via `source loki-activate`, | ||
the following build flags enable the demonstrator build targets: | ||
|
@@ -193,10 +216,10 @@ the following build flags enable the demonstrator build targets: | |
# For general use on workstations with GNU | ||
# Please note that OpenACC needs to be disable with GNU, | ||
# since CLAW-generated code currently does not comply with GNU. | ||
./cloudsc-bundle build --clean --with-loki --loki-frontend=fp --cmake="ENABLE_ACC=OFF" --arch=$PWD/arch/ecmwf/leap42/gnu/7.3.0/env.sh | ||
./cloudsc-bundle build --clean --with-loki --loki-frontend=fp --arch=./arch/ecmwf/leap42/gnu/7.3.0 | ||
|
||
# For GPU exploration on volta | ||
./cloudsc-bundle build --clean [--with-gpu]--with-loki --loki-frontend=fp --arch=$PWD/arch/ecmwf/volta/pgi-gpu/20.9/env.sh | ||
./cloudsc-bundle build --clean [--with-gpu]--with-loki --loki-frontend=fp --arch=./arch/ecmwf/volta/nvhpc/20.9 | ||
``` | ||
|
||
The following Loki modes are included in the dwarf, each with a bespoke demonstrator build: | ||
|
@@ -220,7 +243,7 @@ The following Loki modes are included in the dwarf, each with a bespoke demonstr | |
the kernel to C and calls it via iso_c_bindings interfaces from the | ||
driver. | ||
|
||
#### A note on frontends | ||
### A note on frontends | ||
|
||
Loki currently supports three frontends to parse the Fortran source code: | ||
|
||
|
@@ -237,11 +260,3 @@ means we require the `.xmod` module description files for utility | |
routines in `src/common` for processing the CLOUDSC source files with | ||
the OMNI frontend. These are stored in the source under | ||
`src/cloudsc_loki/xmod`. | ||
|
||
#### A note on accuracy in Loki variants | ||
|
||
The original CLOUDSC kernel contains a bug that forces the use of a single | ||
precision constant for an exponential computation. This has been corrected | ||
in the Loki-specific variants, resulting in small deviations in the final | ||
results for some variables against the reference data from the original | ||
version. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
1.1.0 | ||
1.2.0 |
Oops, something went wrong.