Skip to content

Commit

Permalink
Merge branch 'release/1.4.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
samhatfield committed Jul 23, 2024
2 parents 023f5e3 + 868b71f commit 5e920dd
Show file tree
Hide file tree
Showing 311 changed files with 34,368 additions and 4,447 deletions.
13 changes: 8 additions & 5 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
Authors and Contributors
========================

- P. Courtier (ECMWF)
- W. Deconinck (ECMWF)
- D. Degrauwe (RMI)
- D. Dent (ECMWF)
- P. Dueben (ECMWF)
- R. El Khatib (Meteo France)
- D. Giard (Meteo France)
- J. Hague (ECMWF)
- M. Hamrud (ECMWF)
- S. Hatfield (ECMWF)
- M. Hortal (ECMWF)
- L. Isaksen (ECMWF)
- G. Mozdzynski (ECMWF)
- P. Marguinaud (Meteo France)
- O. Marsden (ECMWF)
- L. Mosimann (NVIDIA)
- G. Mozdzynski (ECMWF)
- A. Mueller (ECMWF)
- M. Hortal (ECMWF)
- P. Courtier (ECMWF)
- D. Degrauwe (RMI)
- D. Giard (Meteo France)
- G. Radnoti (ECMWF)
- D. Salmond (ECMWF)
- Y. Seity (Meteo France)
Expand Down
112 changes: 103 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,50 @@
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

cmake_minimum_required( VERSION 3.12 FATAL_ERROR )
cmake_minimum_required( VERSION 3.18 FATAL_ERROR )
# CMake 3.17 adds INTERFACE link options which get propagated to the link stage,
# even if the target is linked in privately
# CMake 3.18 allows "LINK_LANG_AND_ID" generator expression.

find_package( ecbuild 3.4 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../ecbuild )

project( ectrans LANGUAGES C Fortran )
project( ectrans LANGUAGES C CXX Fortran )
include( ectrans_macros )

set(CMAKE_CXX_STANDARD 17)

ecbuild_enable_fortran( REQUIRED NO_MODULE_DIRECTORY )

### Find (optional) dependencies
### Find (optional) dependencies

ecbuild_find_package( NAME fiat REQUIRED )

ecbuild_add_option( FEATURE MPI
ecbuild_add_option( FEATURE MPI
DESCRIPTION "Support for MPI distributed memory parallelism"
REQUIRED_PACKAGES "MPI COMPONENTS Fortran CXX"
CONDITION fiat_HAVE_MPI )

ecbuild_add_option( FEATURE OMP
DEFAULT ON
DESCRIPTION "Support for OpenMP shared memory parallelism"
REQUIRED_PACKAGES "OpenMP COMPONENTS Fortran" )

if( ${CMAKE_VERSION} VERSION_LESS "3.25" AND (NOT DEFINED ENABLE_ACC OR ENABLE_ACC ) )
# See https://gitlab.kitware.com/cmake/cmake/-/issues/23691, fixed in CMake 3.25
# (TL;DR: FindOpenACC sets OpenACC_<LANG>_FOUND correctly but does not set
# OpenACC_FOUND unless all three C, CXX, and Fortran have been found - even if
# only one language has been requested via COMPONENTS)
find_package( OpenACC COMPONENTS Fortran )
if( OpenACC_Fortran_FOUND )
set( OpenACC_FOUND ON )
endif()
endif()

ecbuild_add_option( FEATURE ACC
DEFAULT OFF
DESCRIPTION "Support for using GPUs with OpenACC"
REQUIRED_PACKAGES "OpenACC COMPONENTS Fortran" )

ecbuild_add_option( FEATURE DOUBLE_PRECISION
DEFAULT ON
DESCRIPTION "Support for Double Precision" )
Expand All @@ -50,18 +73,90 @@ if( NOT HAVE_MKL )
option( FFTW_ENABLE_MKL OFF )
endif()

ecbuild_add_option( FEATURE FFTW
ecbuild_add_option( FEATURE CPU
DEFAULT ON
DESCRIPTION "Support for fftw"
REQUIRED_PACKAGES "FFTW COMPONENTS double ${single}" )
DESCRIPTION "Compile CPU version of ectrans"
)

if( HAVE_CPU )
ecbuild_find_package( NAME FFTW REQUIRED COMPONENTS double ${single} )
endif()

ecbuild_add_option( FEATURE TRANSI
DEFAULT ON
DESCRIPTION "Compile TransI C-interface to trans"
CONDITION HAVE_DOUBLE_PRECISION )
CONDITION HAVE_DOUBLE_PRECISION AND HAVE_CPU )

# Search for available GPU runtimes, searching for CUDA first and, if not found,
# attempt to find HIP
if( ECTRANS_ENABLE_GPU OR (NOT DEFINED ECTRANS_ENABLE_GPU AND ENABLE_GPU))
set(HAVE_CUDA 0)
set(HAVE_HIP 0)
ectrans_find_cuda() # sets "HAVE_CUDA"
if( NOT HAVE_CUDA )
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
ectrans_find_hip() # sets "HAVE_HIP"
else()
message("Cannot enable for HIP language, require CMake >= 3.24")
endif()
endif()
endif()

ecbuild_add_option( FEATURE GPU
DEFAULT OFF
DESCRIPTION "Compile GPU version of ectrans (Requires OpenACC or sufficient OpenMP offloading support and MPI)"
CONDITION (HAVE_HIP OR HAVE_CUDA) AND (HAVE_ACC OR HAVE_OMP) AND HAVE_MPI )

if( HAVE_GPU )
if( HAVE_ACC )
set( GPU_OFFLOAD "ACC" )
elseif( HAVE_OMP )
set( GPU_OFFLOAD "OMP" )
else()
ecbuild_error("Could not enable GPU as OMP or ACC were not enabled")
endif()
endif()

ecbuild_add_option( FEATURE CUTLASS
DEFAULT OFF
CONDITION HAVE_GPU AND HAVE_CUDA AND CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC"
DESCRIPTION "Support for Cutlass BLAS operations"
REQUIRED_PACKAGES "NvidiaCutlass VERSION 2.11" )

# following also needs cuda arch sm80 to be effective
ecbuild_add_option( FEATURE CUTLASS_3XTF32
DEFAULT ON
CONDITION HAVE_SINGLE_PRECISION AND HAVE_CUTLASS
DESCRIPTION "Support for 3xTF32 with Cutlass (>= 2.8) and CUDA_ARCHITECTURES >= 80" )

ecbuild_add_option( FEATURE GPU_AWARE_MPI
DEFAULT ON
CONDITION HAVE_GPU
REQUIRED_PACKAGES "MPI COMPONENTS CXX Fortran"
DESCRIPTION "Enable CUDA-aware MPI" )

ecbuild_add_option( FEATURE GPU_GRAPHS_GEMM
DEFAULT ON
CONDITION HAVE_GPU
DESCRIPTION "Enable graph-based optimisation of Legendre transform GEMM kernel" )

if( BUILD_SHARED_LIBS )
set( GPU_STATIC_DEFAULT OFF )
else()
set( GPU_STATIC_DEFAULT ON )
endif()
ecbuild_add_option( FEATURE GPU_STATIC
DEFAULT ${GPU_STATIC_DEFAULT}
DESCRIPTION "Compile GPU library as static library")

ectrans_find_lapack()

ecbuild_add_option( FEATURE TESTS
DEFAULT ON
DESCRIPTION "Enable unit testing"
REQUIRED_PACKAGES "MPI COMPONENTS Fortran"
CONDITION HAVE_CPU )

### Add sources and tests
include( ectrans_compile_options )
add_subdirectory( src )
Expand All @@ -77,4 +172,3 @@ endif()
ecbuild_install_project( NAME ${PROJECT_NAME} )

ecbuild_print_summary()

35 changes: 29 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ Introduction
============

ecTrans is the global spherical Harmonics transforms library, extracted from the IFS.
It is using a hybrid of MPI and OpenMP parallelisation strategies.
The package contains both single- and double precision Fortran libraries (trans_sp, trans_dp),
as well as a C interface to the double-precision version (transi_dp)
It contains both CPU and GPU (Nvidia) code-paths.
The CPU version uses a hybrid of MPI and OpenMP parallelisation strategies, while the GPU version combines MPI and OpenACC.
A default installation builds both CPU libraries (trans_sp, trans_dp) and various flavours of GPU libraries in (trans_gpu_{sp/dp} shared library, trans_gpu_static_{sp/dp} static library, trans_gpu_static_CA_{sp/dp} static library requiring CUDA-aware MPI implementation), as well as a C interface to the double-precision version (transi_dp). A simple benchmark driver is also built against each of these libraries, allowing simple testing of the transforms.

License
=======

Trans is distributed under the Apache License Version 2.0.
ecTrans is distributed under the Apache License Version 2.0.
See `LICENSE` file for details.

Installing ecTrans
Expand All @@ -26,6 +26,8 @@ Supported Platforms

Other UNIX-like operating systems may work too out of the box.

The GPU codepath has only been tested with NVHPC compilers on recent Nvidia GPUs.

Requirements
------------
- Fortran compiler with OpenMP support
Expand All @@ -38,6 +40,10 @@ Requirements
Further optional recommended dependencies:
- FFTW (http://www.fftw.org)

For the GPU libraries :
- Fortran compiler with OpenACC support
- CUDA toolkit (compiler, and CUBLAS and CUFFT libraries)

Building ecTrans
----------------

Expand Down Expand Up @@ -69,8 +75,21 @@ Extra options can be added to the `cmake` command to control the build:
- `-DENABLE_DOUBLE_PRECISION=<ON|OFF>` default=ON
- `-DENABLE_TRANSI=<ON|OFF>` default=ON
- `-DENABLE_MKL=<ON|OFF>` default=ON
- `-DENABLE_FFTW=<ON|OFF>` default=ON
- `-DENABLE_GPU=<ON|OFF>` default=OFF
- `-DCMAKE_INSTALL_PREFIX=<install-prefix>`

Specific extra options exist for GPU installation:
- `-DENABLE_GPU_AWARE_MPI=<ON|OFF>` default=OF
- `-DENABLE_GPU_GRAPHS_GEMM=<ON|OFF>` default=ON
- `-DENABLE_CUTLASS=<ON|OFF>` default=OFF
- `-DENABLE_3XTF32=<ON|OFF>` default=OFF

GPU-aware MPI allows buffers residing on GPU to be passed to MPI communication calls directly. This requires a compatible MPI installation.
Graph work-flows allow a series of GPU operations to be scheduled in an efficient manner.
This is useful both for the batched FFTs and the batched GEMMs on which ecTrans relies, although for FFTs this is currently relied upon.
Cutlass is an Nvidia library of templates for GEMM operations. 3xTF32 is a specific acceleration for single precision operations, enabled by Cutlass.

More options to control compilation flags, only when defaults are not sufficient

- `-DCMAKE_Fortran_FLAGS=<fortran-flags>`
Expand All @@ -82,13 +101,17 @@ Optionally, tests can be run to check succesful compilation, when the feature TE

$ ctest

The benchmark drivers are found in the bin directory.
A brief description of available command-line arguments can be obtained with e.g.
ectrans-benchmark-cpu-sp --help

Reporting Bugs
==============

TODO
Please report bugs using a [GitHub issue](https://github.com/ecmwf-ifs/ectrans/issues). Support is given on a best-effort basis by package developers.

Contributing
============

TODO
Contributions to ecTrans are welcome. In order to do so, please open a [GitHub issue](https://github.com/ecmwf-ifs/ectrans/issues) where a feature request or bug can be discussed. Then create a [pull request](https://github.com/ecmwf-ifs/ectrans/pulls) with your contribution. All contributors to the pull request need to sign the [contributors license agreement (CLA)](https://claassistant.ecmwf.int/ecmwf-ifs/ectrans).

2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.3.2
1.4.0
Loading

0 comments on commit 5e920dd

Please sign in to comment.