Skip to content

Commit

Permalink
Merge pull request #96 from ecmwf-ifs/je-field-api-view-updates
Browse files Browse the repository at this point in the history
Updated Field API variants of the clouds dwarf (CPU and GPU)
  • Loading branch information
reuterbal authored Nov 4, 2024
2 parents 83d22d5 + 128382c commit 3d5c82a
Show file tree
Hide file tree
Showing 19 changed files with 1,154 additions and 344 deletions.
8 changes: 8 additions & 0 deletions .github/scripts/verify-targets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ exit_code=0
#

targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-fortran dwarf-cloudsc-c)
if [[ "$build_flags" == *"--with-field"* ]]
then
targets+=(dwarf-cloudsc-fortran-field)
fi

if [[ "$build_flags" == *"--with-gpu"* ]]
then
Expand All @@ -18,6 +22,10 @@ then
then
targets+=(dwarf-cloudsc-gpu-claw)
fi
if [[ "$build_flags" == *"--with-field"* ]]
then
targets+=(dwarf-cloudsc-gpu-scc-field)
fi
if [[ "$build_flags" == *"--with-cuda"* ]]
then
targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching)
Expand Down
43 changes: 43 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ jobs:
- '--with-gpu --with-loki --with-atlas' # Enable Loki, Atlas, and GPU variants
- '--with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, Atlas, and GPU variants with MPI
- '--single-precision --with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, and GPU variants with MPI in a single-precision build
- '--with-field' # Enable Field API CPU variant
- '--with-field --with-mpi' # Enable Field API CPU variant with mpi
- '--single-precision --with-field --with-mpi' # Enable Field API CPU variant with mpi and single-precision

pyiface_flag: [''] # Enable the pyiface variant

Expand Down Expand Up @@ -70,6 +73,26 @@ jobs:
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-atlas'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE
- arch: nvhpc/21.9
nvhpc_version: 21.9
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE

- arch: nvhpc/23.5
nvhpc_version: 23.5
Expand All @@ -91,6 +114,26 @@ jobs:
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-atlas'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: ''
build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE
- arch: nvhpc/23.5
nvhpc_version: 23.5
io_library_flag: '--with-serialbox'
build_flags: '--with-gpu --with-loki --with-cuda --with-field --without-mapped-fields'
ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand Down
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- P. Bechtold (ECMWF)
- S. Brdar (ECMWF)
- W. Deconinck (ECMWF)
- J. Ericsson (ECMWF)
- R. Forbes (ECMWF)
- C. Jakob (ECMWF)
- J. Hague (ECMWF)
Expand Down
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,20 @@ endif()
ecbuild_add_option( FEATURE FIELD_API
DESCRIPTION "Use field_api to manage GPU data offload and copyback"
REQUIRED_PACKAGES "field_api"
CONDITION HAVE_CUDA
DEFAULT ON )

ecbuild_find_package( NAME loki )
ecbuild_find_package( NAME atlas )

ecbuild_add_option( FEATURE FIELD_API_DISABLE_MAPPED_MEMORY
DESCRIPTION "Disable the use of ACC mapped memory in Field API objects"
CONDITION HAVE_FIELD_API AND field_api_HAVE_ACC AND field_api_HAVE_CUDA
DEFAULT OFF )
if( HAVE_FIELD_API_DISABLE_MAPPED_MEMORY )
list(APPEND CLOUDSC_DEFINITIONS FIELD_API_DISABLE_MAPPED_MEMORY)
endif()


# Add option for single-precision builds
ecbuild_add_option( FEATURE SINGLE_PRECISION
DESCRIPTION "Build CLOUDSC in single precision" DEFAULT OFF
Expand Down
23 changes: 16 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Balthasar Reuter ([email protected])
prototype that validates runs against platform and language-agnostic
off-line reference data via HDF5 or the Serialbox package. The kernel code
also is slightly cleaner than the original version.
- **dwarf-cloudsc-fortran-field**: A fortran version of CLOUDSC that uses Field API
for the data structures. The intent of this version is to show how
Field API is used in newer versions of the IFS.
- **dwarf-cloudsc-c**: Standalone C version of the kernel that has
been generated by ECMWF tools. This relies exclusively on the Serialbox
validation mechanism.
Expand Down Expand Up @@ -81,13 +84,18 @@ Balthasar Reuter ([email protected])
- **dwarf-cloudsc-gpu-scc-field**: GPU-enabled and optimized version of
CLOUDSC that uses the SCC loop layout, and uses [FIELD API](https://github.com/ecmwf-ifs/field_api) (a Fortran library purpose-built for IFS data-structures that facilitates the
creation and management of field objects in scientific code) to perform device offload
and copyback. The intent is to demonstrate the explicit use of pinned host memory to speed-up
data transfers, as provided by the shipped prototype implmentation, and
investigate the effect of different data storage allocation layouts.
and copyback.
The field api variant supports modern features of the FIELD API such as *field gangs* that group
multiple fields and allocates them in one larger field, in order to reduce allocations and
data transfers. Field gang support can be enabled at runtime by setting the environment
variable `CLOUDSC_PACKED_STORAGE=ON`. If CUDA is available, then the field api variant also supports
the use of allocating fields in pinned memory. This is enabled by setting the
environemnt variable `CLOUDSC_FIELD_API_PINNED=ON` and will speed up data transfers between host and device.
To enable this variant, a suitable CUDA installation is required and the
`--with-cuda` flag needs to be passed at the build stage. This variant lets the CUDA runtime
manage temporary arrays and needs a large `NV_ACC_CUDA_HEAPSIZE`
(eg. `NV_ACC_CUDA_HEAPSIZE=8GB` for 160K columns.)
manage temporary arrays and needs a large `NV_ACC_CUDA_HEAPSIZE` (eg. `NV_ACC_CUDA_HEAPSIZE=8GB` for 160K columns.).
It is possible to disable Field API registering fields in the OpenACC data map, by passing the
`--without-mapped-fields` flag at build stage.
- **cloudsc-pyiface.py**: a combination of the cloudsc/cloudsc-driver routines
of cloudsc-fortran with the uppermost `dwarf` program replaced with a
corresponding Python script capable of HDF5 data load and
Expand Down Expand Up @@ -320,8 +328,9 @@ transfer overheads will dominate timings, and that most supported GPU
variants aim to optimise compute kernel timings only. However, a
dedicated variant `dwarf-cloudsc-gpu-scc-field` has been added to
explore host-side memory pinning, which improves data transfer times
and alternative data layout strategies. By default, this will allocate
each array variable individually in pinned memory. A runtime flag
and alternative data layout strategies. By default, pinned memory is turned off
but can be turned on by setting the environment variable `CLOUDSC_FIELD_API_PINNED=ON`.
This will allocate each array variable individually in pinned memory. A runtime flag
`CLOUDSC_PACKED_STORAGE=ON` can be used to enable "packed" storage,
where multiple arrays are stored in a single base allocation, eg.

Expand Down
20 changes: 16 additions & 4 deletions bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ cmake : >
CMAKE_LINK_DEPENDS_NO_SHARED=ON
CMAKE_EXPORT_COMPILE_COMMANDS=ON
BUILD_serialbox=OFF
BUILD_field_api=OFF
BUILD_eckit=OFF
BUILD_fckit=OFF
BUILD_atlas=OFF
BUILD_field_api=OFF
ENABLE_OMP=ON
ENABLE_CUDA=OFF
ENABLE_SINGLE_PRECISION=OFF
projects :
Expand Down Expand Up @@ -52,10 +53,11 @@ projects :
- field_api :
git : https://github.com/ecmwf-ifs/field_api.git
version : v0.3.0
version : v0.3.3
require : ecbuild
cmake : >
UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module
FIELD_API_ENABLE_ACC=OFF
- fckit :
git : https://github.com/ecmwf/fckit
Expand All @@ -79,7 +81,6 @@ projects :
require : ecbuild serialbox loki field_api

options :

- toolchain :
help : Specify compiler options via supplied toolchain file
cmake : CMAKE_TOOLCHAIN_FILE={{value}}
Expand All @@ -92,9 +93,21 @@ options :
ENABLE_DOUBLE_PRECISION=OFF
FIELD_API_DEFINITIONS=SINGLE
- with-field-api :
help : Enable Field API variants to be built
cmake : >
BUILD_field_api=ON
ENABLE_CLOUDSC_FORTRAN_FIELD=ON
- without-mapped-fields :
help : Disables automatic registering of Field API fields in ACC map.
cmake : >
ENABLE_FIELD_API_DISABLE_MAPPED_MEMORY=ON
- with-gpu :
help : Enable GPU kernels
cmake : >
FIELD_API_ENABLE_ACC=ON
ENABLE_CLOUDSC_GPU_SCC=ON
ENABLE_CLOUDSC_GPU_SCC_HOIST=ON
ENABLE_CLOUDSC_GPU_SCC_K_CACHING=ON
Expand All @@ -106,7 +119,6 @@ options :
ENABLE_CUDA=ON
ENABLE_CLOUDSC_GPU_SCC_CUF=ON
ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON
BUILD_field_api=ON
- with-hip :
help: Enable GPU kernel variant based on HIP
Expand Down
69 changes: 69 additions & 0 deletions src/cloudsc_fortran/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ ecbuild_add_option( FEATURE CLOUDSC_FORTRAN
CONDITION Serialbox_FOUND OR HDF5_FOUND
)

# Define the cloudsc CPU variant
ecbuild_add_option( FEATURE CLOUDSC_FORTRAN_FIELD
DESCRIPTION "Build the field API Fortran version of CLOUDSC using Serialbox" DEFAULT ON
CONDITION HAVE_FIELD_API AND (Serialbox_FOUND OR HDF5_FOUND)
)

if( HAVE_CLOUDSC_FORTRAN )

# Define the binary build target for this variant
Expand Down Expand Up @@ -73,3 +79,66 @@ if( HAVE_CLOUDSC_FORTRAN )
CONDITION HAVE_OMP AND HAVE_MPI
)
endif()

if( HAVE_CLOUDSC_FORTRAN_FIELD )

# Define the binary build target for this variant
ecbuild_add_executable(
TARGET dwarf-cloudsc-fortran-field
SOURCES
dwarf_cloudsc.F90
cloudsc_driver_field_mod.F90
cloudsc.F90
LIBS
cloudsc-common-lib
DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_FIELD
)

# Create symlink for the input data
if( HAVE_SERIALBOX )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data )
endif()

if( HAVE_HDF5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 )
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 )
endif()

ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-serial
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 1 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
OMP 1
)
ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-omp
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 4 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
OMP 4
CONDITION HAVE_OMP
)
ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-mpi
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 1 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
MPI 2
OMP 1
CONDITION HAVE_MPI
)
ecbuild_add_test(
TARGET dwarf-cloudsc-fortran-field-mpi-omp
COMMAND bin/dwarf-cloudsc-fortran-field
ARGS 4 100 16
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../..
MPI 2
OMP 4
CONDITION HAVE_OMP AND HAVE_MPI
)
endif()

Loading

0 comments on commit 3d5c82a

Please sign in to comment.