From 78f0805c13867b73421412abdd00c102249437ba Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 19 Oct 2022 18:15:22 +0200 Subject: [PATCH 001/174] cloudsc builds and runs with new field_module, gives correct results --- .../cloudsc_driver_gpu_scc_field_mod.F90 | 109 +- src/common/module/cloudsc_field_state_mod.F90 | 132 +- src/common/module/field_module.F90 | 7528 +++++++++++++---- src/common/module/field_module_orig.F90 | 2189 +++++ src/common/module/yomphyder.F90 | 4 +- 5 files changed, 8058 insertions(+), 1904 deletions(-) create mode 100644 src/common/module/field_module_orig.F90 diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 index cc294065..503f2ed7 100644 --- a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 +++ b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 @@ -102,59 +102,61 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & ! Global timer for the parallel region CALL TIMER%START(NUMOMP) - PT => GET_DEVICE_DATA(FIELD_STATE%F_PT) - PQ => GET_DEVICE_DATA(FIELD_STATE%F_PQ) - PVFA => GET_DEVICE_DATA(FIELD_STATE%F_PVFA) - PVFL => GET_DEVICE_DATA(FIELD_STATE%F_PVFL) - PVFI => GET_DEVICE_DATA(FIELD_STATE%F_PVFI) - PDYNA => GET_DEVICE_DATA(FIELD_STATE%F_PDYNA) - PDYNL => GET_DEVICE_DATA(FIELD_STATE%F_PDYNL) - PDYNI => GET_DEVICE_DATA(FIELD_STATE%F_PDYNI) - PHRSW => GET_DEVICE_DATA(FIELD_STATE%F_PHRSW) - PHRLW => GET_DEVICE_DATA(FIELD_STATE%F_PHRLW) - PVERVEL => GET_DEVICE_DATA(FIELD_STATE%F_PVERVEL) - PAP => GET_DEVICE_DATA(FIELD_STATE%F_PAP) - PAPH => GET_DEVICE_DATA(FIELD_STATE%F_PAPH) - PLSM => GET_DEVICE_DATA(FIELD_STATE%F_PLSM) - LDCUM => GET_DEVICE_DATA(FIELD_STATE%F_LDCUM) - KTYPE => GET_DEVICE_DATA(FIELD_STATE%F_KTYPE) - PLU => GET_DEVICE_DATA(FIELD_STATE%F_PLU) - PLUDE => GET_DEVICE_DATA(FIELD_STATE%F_PLUDE) - PSNDE => GET_DEVICE_DATA(FIELD_STATE%F_PSNDE) - PMFU => GET_DEVICE_DATA(FIELD_STATE%F_PMFU) - PMFD => GET_DEVICE_DATA(FIELD_STATE%F_PMFD) - PA => GET_DEVICE_DATA(FIELD_STATE%F_PA) - PCLV => GET_DEVICE_DATA(FIELD_STATE%F_PCLV) - PSUPSAT => GET_DEVICE_DATA(FIELD_STATE%F_PSUPSAT) - PLCRIT_AER => GET_DEVICE_DATA(FIELD_STATE%F_PLCRIT_AER) - PICRIT_AER => GET_DEVICE_DATA(FIELD_STATE%F_PICRIT_AER) - PRE_ICE => GET_DEVICE_DATA(FIELD_STATE%F_PRE_ICE) - PCCN => GET_DEVICE_DATA(FIELD_STATE%F_PCCN) - PNICE => GET_DEVICE_DATA(FIELD_STATE%F_PNICE) - PCOVPTOT => GET_DEVICE_DATA(FIELD_STATE%F_PCOVPTOT) - PRAINFRAC_TOPRFZ => GET_DEVICE_DATA(FIELD_STATE%F_PRAINFRAC_TOPRFZ) - PFSQLF => GET_DEVICE_DATA(FIELD_STATE%F_PFSQLF) - PFSQIF => GET_DEVICE_DATA(FIELD_STATE%F_PFSQIF) - PFCQLNG => GET_DEVICE_DATA(FIELD_STATE%F_PFCQLNG) - PFCQNNG => GET_DEVICE_DATA(FIELD_STATE%F_PFCQNNG) - PFSQRF => GET_DEVICE_DATA(FIELD_STATE%F_PFSQRF) - PFSQSF => GET_DEVICE_DATA(FIELD_STATE%F_PFSQSF) - PFCQRNG => GET_DEVICE_DATA(FIELD_STATE%F_PFCQRNG) - PFCQSNG => GET_DEVICE_DATA(FIELD_STATE%F_PFCQSNG) - PFSQLTUR => GET_DEVICE_DATA(FIELD_STATE%F_PFSQLTUR) - PFSQITUR => GET_DEVICE_DATA(FIELD_STATE%F_PFSQITUR) - PFPLSL => GET_DEVICE_DATA(FIELD_STATE%F_PFPLSL) - PFPLSN => GET_DEVICE_DATA(FIELD_STATE%F_PFPLSN) - PFHPSL => GET_DEVICE_DATA(FIELD_STATE%F_PFHPSL) - PFHPSN => GET_DEVICE_DATA(FIELD_STATE%F_PFHPSN) - TEND_LOC_T => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_LOC%F_T) - TEND_LOC_Q => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_LOC%F_Q) - TEND_LOC_A => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_LOC%F_A) - TEND_LOC_CLD => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_LOC%F_CLD) - TEND_TMP_T => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_TMP%F_T) - TEND_TMP_Q => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_TMP%F_Q) - TEND_TMP_A => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_TMP%F_A) - TEND_TMP_CLD => GET_DEVICE_DATA(FIELD_STATE%TENDENCY_TMP%F_CLD) + CALL FIELD_STATE%F_PT%GET_DEVICE_DATA_RDONLY(PT) + CALL FIELD_STATE%F_PQ%GET_DEVICE_DATA_RDONLY(PQ) + CALL FIELD_STATE%F_PVFA%GET_DEVICE_DATA_RDONLY(PVFA) + CALL FIELD_STATE%F_PVFL%GET_DEVICE_DATA_RDONLY(PVFL) + CALL FIELD_STATE%F_PVFI%GET_DEVICE_DATA_RDONLY(PVFI) + CALL FIELD_STATE%F_PDYNA%GET_DEVICE_DATA_RDONLY(PDYNA) + CALL FIELD_STATE%F_PDYNL%GET_DEVICE_DATA_RDONLY(PDYNL) + CALL FIELD_STATE%F_PDYNI%GET_DEVICE_DATA_RDONLY(PDYNI) + CALL FIELD_STATE%F_PHRSW%GET_DEVICE_DATA_RDONLY(PHRSW) + CALL FIELD_STATE%F_PHRLW%GET_DEVICE_DATA_RDONLY(PHRLW) + CALL FIELD_STATE%F_PVERVEL%GET_DEVICE_DATA_RDONLY(PVERVEL) + CALL FIELD_STATE%F_PAP%GET_DEVICE_DATA_RDONLY(PAP) + CALL FIELD_STATE%F_PAPH%GET_DEVICE_DATA_RDONLY(PAPH) + CALL FIELD_STATE%F_PLSM%GET_DEVICE_DATA_RDONLY(PLSM) + CALL FIELD_STATE%F_LDCUM%GET_DEVICE_DATA_RDONLY(LDCUM) + CALL FIELD_STATE%F_KTYPE%GET_DEVICE_DATA_RDONLY(KTYPE) + CALL FIELD_STATE%F_PLU%GET_DEVICE_DATA_RDONLY(PLU) + CALL FIELD_STATE%F_PSNDE%GET_DEVICE_DATA_RDONLY(PSNDE) + CALL FIELD_STATE%F_PMFU%GET_DEVICE_DATA_RDONLY(PMFU) + CALL FIELD_STATE%F_PMFD%GET_DEVICE_DATA_RDONLY(PMFD) + CALL FIELD_STATE%F_PA%GET_DEVICE_DATA_RDONLY(PA) + CALL FIELD_STATE%F_PCLV%GET_DEVICE_DATA_RDONLY(PCLV) + CALL FIELD_STATE%F_PSUPSAT%GET_DEVICE_DATA_RDONLY(PSUPSAT) + CALL FIELD_STATE%F_PLCRIT_AER%GET_DEVICE_DATA_RDONLY(PLCRIT_AER) + CALL FIELD_STATE%F_PICRIT_AER%GET_DEVICE_DATA_RDONLY(PICRIT_AER) + CALL FIELD_STATE%F_PRE_ICE%GET_DEVICE_DATA_RDONLY(PRE_ICE) + CALL FIELD_STATE%F_PCCN%GET_DEVICE_DATA_RDONLY(PCCN) + CALL FIELD_STATE%F_PNICE%GET_DEVICE_DATA_RDONLY(PNICE) + CALL FIELD_STATE%TENDENCY_TMP%F_T%GET_DEVICE_DATA_RDONLY(TEND_TMP_T) + CALL FIELD_STATE%TENDENCY_TMP%F_Q%GET_DEVICE_DATA_RDONLY(TEND_TMP_Q) + CALL FIELD_STATE%TENDENCY_TMP%F_A%GET_DEVICE_DATA_RDONLY(TEND_TMP_A) + CALL FIELD_STATE%TENDENCY_TMP%F_CLD%GET_DEVICE_DATA_RDONLY(TEND_TMP_CLD) + + CALL FIELD_STATE%F_PLUDE%GET_DEVICE_DATA_RDWR(PLUDE) + CALL FIELD_STATE%F_PCOVPTOT%GET_DEVICE_DATA_RDWR(PCOVPTOT) + CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%GET_DEVICE_DATA_RDWR(PRAINFRAC_TOPRFZ) + CALL FIELD_STATE%F_PFSQLF%GET_DEVICE_DATA_RDWR(PFSQLF) + CALL FIELD_STATE%F_PFSQIF%GET_DEVICE_DATA_RDWR(PFSQIF) + CALL FIELD_STATE%F_PFCQLNG%GET_DEVICE_DATA_RDWR(PFCQLNG) + CALL FIELD_STATE%F_PFCQNNG%GET_DEVICE_DATA_RDWR(PFCQNNG) + CALL FIELD_STATE%F_PFSQRF%GET_DEVICE_DATA_RDWR(PFSQRF) + CALL FIELD_STATE%F_PFSQSF%GET_DEVICE_DATA_RDWR(PFSQSF) + CALL FIELD_STATE%F_PFCQRNG%GET_DEVICE_DATA_RDWR(PFCQRNG) + CALL FIELD_STATE%F_PFCQSNG%GET_DEVICE_DATA_RDWR(PFCQSNG) + CALL FIELD_STATE%F_PFSQLTUR%GET_DEVICE_DATA_RDWR(PFSQLTUR) + CALL FIELD_STATE%F_PFSQITUR%GET_DEVICE_DATA_RDWR(PFSQITUR) + CALL FIELD_STATE%F_PFPLSL%GET_DEVICE_DATA_RDWR(PFPLSL) + CALL FIELD_STATE%F_PFPLSN%GET_DEVICE_DATA_RDWR(PFPLSN) + CALL FIELD_STATE%F_PFHPSL%GET_DEVICE_DATA_RDWR(PFHPSL) + CALL FIELD_STATE%F_PFHPSN%GET_DEVICE_DATA_RDWR(PFHPSN) + CALL FIELD_STATE%TENDENCY_LOC%F_T%GET_DEVICE_DATA_RDWR(TEND_LOC_T) + CALL FIELD_STATE%TENDENCY_LOC%F_Q%GET_DEVICE_DATA_RDWR(TEND_LOC_Q) + CALL FIELD_STATE%TENDENCY_LOC%F_A%GET_DEVICE_DATA_RDWR(TEND_LOC_A) + CALL FIELD_STATE%TENDENCY_LOC%F_CLD%GET_DEVICE_DATA_RDWR(TEND_LOC_CLD) +!$acc data copyin(yrecldp) ! Local timer for each thread TID = GET_THREAD_NUM() @@ -192,6 +194,7 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & ENDDO !$acc end parallel loop +!$acc end data CALL TIMER%THREAD_END(TID) diff --git a/src/common/module/cloudsc_field_state_mod.F90 b/src/common/module/cloudsc_field_state_mod.F90 index 285da32b..23b9a994 100644 --- a/src/common/module/cloudsc_field_state_mod.F90 +++ b/src/common/module/cloudsc_field_state_mod.F90 @@ -9,7 +9,7 @@ MODULE CLOUDSC_FIELD_STATE_MOD ! Driver module to manage the setup and teardown of the field-based state - USE PARKIND1, ONLY : JPIM, JPRB + USE PARKIND1, ONLY : JPIM, JPRB, JPLM USE YOMPHYDER, ONLY : STATE_TYPE USE YOECLDP, ONLY : NCLV, YRECLDP, YRECLDP_LOAD_PARAMETERS USE YOMCST, ONLY : YOMCST_LOAD_PARAMETERS @@ -20,7 +20,7 @@ MODULE CLOUDSC_FIELD_STATE_MOD USE EXPAND_MOD, ONLY: EXPAND, LOAD_AND_EXPAND, LOAD_AND_EXPAND_STATE, GET_OFFSETS USE VALIDATE_MOD, ONLY: VALIDATE USE CLOUDSC_MPI_MOD, ONLY: IRANK - USE FIELD_MODULE, ONLY: FIELD_2D, FIELD_3D, FIELD_4D, FIELD_INT2D, FIELD_LOG2D, malloc_host_pinned_4d + USE FIELD_MODULE, ONLY: FIELD_2D, FIELD_2D_OWNER, FIELD_3D, FIELD_3D_WRAPPER_PACKED, FIELD_3D_OWNER, FIELD_4D, FIELD_4D_OWNER, FIELD_INT2D, FIELD_INT2D_OWNER, FIELD_LOG2D, FIELD_LOG2D_OWNER, MALLOC_HOST_PINNED_4D IMPLICIT NONE @@ -28,8 +28,8 @@ MODULE CLOUDSC_FIELD_STATE_MOD INTEGER(KIND=JPIM) :: NPROMA, KLEV ! Grid points and vertical levels per block INTEGER(KIND=JPIM) :: NGPTOT, NBLOCKS ! Total number of grid points and blocks INTEGER(KIND=JPIM) :: KFLDX - LOGICAL :: LDSLPHY - LOGICAL :: LDMAINCALL ! T if main call to cloudsc + LOGICAL(KIND=JPLM) :: LDSLPHY + LOGICAL(KIND=JPLM) :: LDMAINCALL ! T if main call to cloudsc REAL(KIND=JPRB) :: PTSPHY ! Physics timestep TYPE(STATE_TYPE) :: TENDENCY_LOC, TENDENCY_TMP @@ -45,15 +45,16 @@ MODULE CLOUDSC_FIELD_STATE_MOD REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA_RWONLY(:,:,:,:) ! Storage fields to provide thread-local views - TYPE(FIELD_2D), POINTER :: F_PRAINFRAC_TOPRFZ, F_PLSM - TYPE(FIELD_INT2D), POINTER :: F_KTYPE - TYPE(FIELD_LOG2D), POINTER :: F_LDCUM - TYPE(FIELD_3D), POINTER :: F_PLCRIT_AER, F_PICRIT_AER, F_PRE_ICE, F_PCCN, F_PNICE, F_PT, F_PQ, & + CLASS(FIELD_2D), POINTER :: F_PRAINFRAC_TOPRFZ, F_PLSM + CLASS(FIELD_INT2D), POINTER :: F_KTYPE + CLASS(FIELD_LOG2D), POINTER :: F_LDCUM + CLASS(FIELD_3D), POINTER :: F_PLCRIT_AER, F_PICRIT_AER, F_PRE_ICE, F_PCCN, F_PNICE, F_PT, F_PQ, & & F_PVFA, F_PVFL, F_PVFI, F_PDYNA, F_PDYNL, F_PDYNI, F_PHRSW, F_PHRLW, F_PVERVEL, F_PAP, F_PAPH, & & F_PLU, F_PLUDE, F_PSNDE, F_PMFU, F_PMFD, F_PA, F_PSUPSAT, F_PCOVPTOT - TYPE(FIELD_3D), POINTER :: F_PFSQLF, F_PFSQIF, F_PFCQLNG, F_PFCQNNG, F_PFSQRF, F_PFSQSF, F_PFCQRNG, & + CLASS(FIELD_3D), POINTER :: F_PFSQLF, F_PFSQIF, F_PFCQLNG, F_PFCQNNG, F_PFSQRF, F_PFSQSF, F_PFCQRNG, & & F_PFCQSNG, F_PFSQLTUR, F_PFSQITUR, F_PFPLSL, F_PFPLSN, F_PFHPSL, F_PFHPSN - TYPE(FIELD_4D), POINTER :: F_PCLV, F_TENDENCY_TMP, F_TENDENCY_LOC +! CLASS(FIELD_4D), POINTER :: F_PCLV, F_TENDENCY_TMP, F_TENDENCY_LOC + CLASS(FIELD_4D), POINTER :: F_PCLV CONTAINS PROCEDURE :: LOAD => CLOUDSC_FIELD_STATE_LOAD PROCEDURE :: VALIDATE => CLOUDSC_FIELD_STATE_VALIDATE @@ -67,40 +68,49 @@ MODULE CLOUDSC_FIELD_STATE_MOD CONTAINS FUNCTION CREATE_FIELD_ALLOCATE_INT2D(SHAPE, NBLOCKS) RESULT(FIELD_PTR) - TYPE(FIELD_INT2D), POINTER :: FIELD_PTR + CLASS(FIELD_INT2D_OWNER), POINTER :: FIELD_PTR INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1), NBLOCKS INTEGER(KIND=JPIM) :: B + INTEGER(KIND=JPIM) :: LBOUNDS(2),UBOUNDS(2) + + LBOUNDS(1) = 1 + LBOUNDS(2) = 1 + UBOUNDS(1) = SHAPE(1) + UBOUNDS(2) = NBLOCKS ALLOCATE(FIELD_PTR) - FIELD_PTR = FIELD_INT2D(SHAPE=SHAPE, NBLOCKS=NBLOCKS, PERSISTENT=.TRUE.) - ! Due to Fortran's reallocation-on-assignment behaviour, the - ! FIELD_PTR%PTR pointer needs updating here for owned objects. - IF (FIELD_PTR%OWNED) FIELD_PTR%PTR => FIELD_PTR%DATA + CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) END FUNCTION CREATE_FIELD_ALLOCATE_INT2D FUNCTION CREATE_FIELD_ALLOCATE_LOG2D(SHAPE, NBLOCKS) RESULT(FIELD_PTR) - TYPE(FIELD_LOG2D), POINTER :: FIELD_PTR + CLASS(FIELD_LOG2D_OWNER), POINTER :: FIELD_PTR INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1), NBLOCKS INTEGER(KIND=JPIM) :: B + INTEGER(KIND=JPIM) :: LBOUNDS(2),UBOUNDS(2) + + LBOUNDS(1) = 1 + LBOUNDS(2) = 1 + UBOUNDS(1) = SHAPE(1) + UBOUNDS(2) = NBLOCKS ALLOCATE(FIELD_PTR) - FIELD_PTR = FIELD_LOG2D(SHAPE=SHAPE, NBLOCKS=NBLOCKS, PERSISTENT=.TRUE.) - ! Due to Fortran's reallocation-on-assignment behaviour, the - ! FIELD_PTR%PTR pointer needs updating here for owned objects. - IF (FIELD_PTR%OWNED) FIELD_PTR%PTR => FIELD_PTR%DATA + CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) END FUNCTION CREATE_FIELD_ALLOCATE_LOG2D FUNCTION CREATE_FIELD_ALLOCATE_2D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) - TYPE(FIELD_2D), POINTER :: FIELD_PTR + CLASS(FIELD_2D_OWNER), POINTER :: FIELD_PTR INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1), NBLOCKS LOGICAL, OPTIONAL, INTENT(IN) :: ZERO INTEGER(KIND=JPIM) :: B + INTEGER(KIND=JPIM) :: LBOUNDS(2), UBOUNDS(2) + + LBOUNDS(1) = 1 + LBOUNDS(2) = 1 + UBOUNDS(1) = SHAPE(1) + UBOUNDS(2) = NBLOCKS ALLOCATE(FIELD_PTR) - FIELD_PTR = FIELD_2D(SHAPE=SHAPE, NBLOCKS=NBLOCKS, PERSISTENT=.TRUE.) - ! Due to Fortran's reallocation-on-assignment behaviour, the - ! FIELD_PTR%PTR pointer needs updating here for owned objects. - IF (FIELD_PTR%OWNED) FIELD_PTR%PTR => FIELD_PTR%DATA + CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) IF (PRESENT(ZERO)) THEN IF (ZERO) THEN @@ -114,16 +124,21 @@ FUNCTION CREATE_FIELD_ALLOCATE_2D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) END FUNCTION CREATE_FIELD_ALLOCATE_2D FUNCTION CREATE_FIELD_ALLOCATE_3D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) - TYPE(FIELD_3D), POINTER :: FIELD_PTR + CLASS(FIELD_3D_OWNER), POINTER :: FIELD_PTR INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2), NBLOCKS LOGICAL, OPTIONAL, INTENT(IN) :: ZERO INTEGER(KIND=JPIM) :: B + INTEGER(KIND=JPIM) :: LBOUNDS(3), UBOUNDS(3) + + LBOUNDS(1) = 1 + LBOUNDS(2) = 1 + LBOUNDS(3) = 1 + UBOUNDS(1) = SHAPE(1) + UBOUNDS(2) = SHAPE(2) + UBOUNDS(3) = NBLOCKS ALLOCATE(FIELD_PTR) - FIELD_PTR = FIELD_3D(SHAPE=SHAPE, NBLOCKS=NBLOCKS, PERSISTENT=.TRUE.) - ! Due to Fortran's reallocation-on-assignment behaviour, the - ! FIELD_PTR%PTR pointer needs updating here for owned objects. - IF (FIELD_PTR%OWNED) FIELD_PTR%PTR => FIELD_PTR%DATA + CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) IF (PRESENT(ZERO)) THEN IF (ZERO) THEN @@ -137,16 +152,23 @@ FUNCTION CREATE_FIELD_ALLOCATE_3D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) END FUNCTION CREATE_FIELD_ALLOCATE_3D FUNCTION CREATE_FIELD_ALLOCATE_4D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) - TYPE(FIELD_4D), POINTER :: FIELD_PTR + CLASS(FIELD_4D_OWNER), POINTER :: FIELD_PTR INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3), NBLOCKS LOGICAL, OPTIONAL, INTENT(IN) :: ZERO INTEGER(KIND=JPIM) :: B + INTEGER(KIND=JPIM) :: LBOUNDS(4), UBOUNDS(4) + + LBOUNDS(1) = 1 + LBOUNDS(2) = 1 + LBOUNDS(3) = 1 + LBOUNDS(4) = 1 + UBOUNDS(1) = SHAPE(1) + UBOUNDS(2) = SHAPE(2) + UBOUNDS(3) = SHAPE(3) + UBOUNDS(4) = NBLOCKS ALLOCATE(FIELD_PTR) - FIELD_PTR = FIELD_4D(SHAPE=SHAPE, NBLOCKS=NBLOCKS, PERSISTENT=.TRUE.) - ! Due to Fortran's reallocation-on-assignment behaviour, the - ! FIELD_PTR%PTR pointer needs updating here for owned objects. - IF (FIELD_PTR%OWNED) FIELD_PTR%PTR => FIELD_PTR%DATA + CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) IF (PRESENT(ZERO)) THEN IF (ZERO) THEN @@ -159,29 +181,29 @@ FUNCTION CREATE_FIELD_ALLOCATE_4D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) END IF END FUNCTION CREATE_FIELD_ALLOCATE_4D - FUNCTION CREATE_FIELD_WRAP_PACKED_2D(DATA, IDX) RESULT(FIELD_PTR) - ! Create a single 2D field with implicit blocking dimension by wrapping existing data - TYPE(FIELD_2D), POINTER :: FIELD_PTR - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - - ALLOCATE(FIELD_PTR) - FIELD_PTR = FIELD_2D(DATA=DATA, IDX=IDX) - END FUNCTION CREATE_FIELD_WRAP_PACKED_2D +! FUNCTION CREATE_FIELD_WRAP_PACKED_2D(DATA, IDX) RESULT(FIELD_PTR) +! ! Create a single 2D field with implicit blocking dimension by wrapping existing data +! TYPE(FIELD_2D), POINTER :: FIELD_PTR +! REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) +! INTEGER(KIND=JPIM), INTENT(IN) :: IDX +! +! ALLOCATE(FIELD_PTR) +! FIELD_PTR = FIELD_2D(DATA=DATA, IDX=IDX) +! END FUNCTION CREATE_FIELD_WRAP_PACKED_2D FUNCTION CREATE_FIELD_WRAP_PACKED_3D(DATA, IDX) RESULT(FIELD_PTR) ! Create a single 1D field with implicit blocking dimension by wrapping existing data - TYPE(FIELD_3D), POINTER :: FIELD_PTR + CLASS(FIELD_3D_WRAPPER_PACKED), POINTER :: FIELD_PTR REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) INTEGER(KIND=JPIM), INTENT(IN) :: IDX ALLOCATE(FIELD_PTR) - FIELD_PTR = FIELD_3D(DATA=DATA, IDX=IDX) + CALL FIELD_PTR%INIT(DATA=DATA, IDX=IDX) END FUNCTION CREATE_FIELD_WRAP_PACKED_3D SUBROUTINE LOAD_AND_EXPAND_FIELD_2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - TYPE(FIELD_2D), INTENT(INOUT) :: FIELD + CLASS(FIELD_2D), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:) @@ -196,7 +218,7 @@ END SUBROUTINE LOAD_AND_EXPAND_FIELD_2D SUBROUTINE LOAD_AND_EXPAND_FIELD_INT2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - TYPE(FIELD_INT2D), INTENT(INOUT) :: FIELD + CLASS(FIELD_INT2D), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG INTEGER(KIND=JPIM), ALLOCATABLE :: BUFFER(:) @@ -209,12 +231,12 @@ SUBROUTINE LOAD_AND_EXPAND_FIELD_INT2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCK DEALLOCATE(BUFFER) END SUBROUTINE LOAD_AND_EXPAND_FIELD_INT2D - SUBROUTINE LOAD_AND_EXPAND_FIELD_LOG2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + SUBROUTINE LOAD_AND_EXPAND_FIELD_LOG2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - TYPE(FIELD_LOG2D), INTENT(INOUT) :: FIELD + CLASS(FIELD_LOG2D), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG - LOGICAL, ALLOCATABLE :: BUFFER(:) + LOGICAL(KIND=JPLM), ALLOCATABLE :: BUFFER(:) INTEGER(KIND=JPIM) :: START, END, SIZE CALL GET_OFFSETS(START, END, SIZE, NLON, 1, 1, NGPTOT, NGPTOTG) @@ -226,7 +248,7 @@ END SUBROUTINE LOAD_AND_EXPAND_FIELD_LOG2D SUBROUTINE LOAD_AND_EXPAND_FIELD_3D(NAME, FIELD, NLON, NLEV, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - TYPE(FIELD_3D), INTENT(INOUT) :: FIELD + CLASS(FIELD_3D), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NLEV, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:,:) @@ -241,7 +263,7 @@ END SUBROUTINE LOAD_AND_EXPAND_FIELD_3D SUBROUTINE LOAD_AND_EXPAND_FIELD_4D(NAME, FIELD, NLON, NLEV, NDIM, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - TYPE(FIELD_4D), INTENT(INOUT) :: FIELD + CLASS(FIELD_4D), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NLEV,NDIM, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:,:,:) @@ -349,7 +371,7 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) ! Allocate bulk buffers for read-only input 3D fields NFIELDS = 24 ! ALLOCATE(SELF%DATA_RDONLY(NPROMA, SELF%KLEV, NFIELDS, SELF%NBLOCKS)) - SELF%DATA_RDONLY => malloc_host_pinned_4d([NPROMA, SELF%KLEV, NFIELDS], SELF%NBLOCKS) + SELF%DATA_RDONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV, NFIELDS], SELF%NBLOCKS) SELF%F_PT => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=1) SELF%F_PQ => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=2) @@ -388,7 +410,7 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) ! Allocate bulk buffers for output 3D fields NFIELDS = 14 ! CALL FIELD_INIT(SELF%DATA_RWONLY, NPROMA, SELF%KLEV+1, NFIELDS, SELF%NBLOCKS) - SELF%DATA_RWONLY => malloc_host_pinned_4d([NPROMA, SELF%KLEV+1, NFIELDS], SELF%NBLOCKS) + SELF%DATA_RWONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV+1, NFIELDS], SELF%NBLOCKS) SELF%DATA_RWONLY(:,:,:,:) = 0.0_JPRB SELF%F_PFSQLF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=1) diff --git a/src/common/module/field_module.F90 b/src/common/module/field_module.F90 index e2268014..ca1e3101 100644 --- a/src/common/module/field_module.F90 +++ b/src/common/module/field_module.F90 @@ -1,11 +1,7 @@ -! (C) Copyright 1988- ECMWF. +! Rank and shape definitions for simple templating ! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. +! Note that the ranks encode coneptual dimensions here, eg. FIELD_2D encodes +! a surface field and FIELD_3D represents a field with a vertical component. MODULE FIELD_MODULE ! The FIELD types provided by this module provide data abstractions that @@ -15,29 +11,34 @@ MODULE FIELD_MODULE ! incorporated into Atlas. They can also provide backward-compatibility ! for non-Atlas execution modes. -USE PARKIND1, ONLY: JPIM, JPRB +USE PARKIND1, ONLY: JPIM, JPRB, JPLM USE OML_MOD, ONLY: OML_MAX_THREADS, OML_MY_THREAD USE IEEE_ARITHMETIC, ONLY: IEEE_SIGNALING_NAN USE CUDAFOR +USE ISO_C_BINDING +USE OPENACC -use openacc +IMPLICIT NONE -use iso_c_binding +INTEGER (KIND=JPIM), PARAMETER :: NDEVFRESH = INT(B'00000001', JPIM), NHSTFRESH = INT(B'00000010', JPIM) +INTEGER (KIND=JPIM), PARAMETER, PRIVATE :: NH2D = 1, ND2H = 2, NRD = INT(B'00000001', JPIM), NWR = INT(B'00000010', JPIM) -IMPLICIT NONE +TYPE GPU_STATS + INTEGER :: TRANSFER_CPU_TO_GPU = 0 + INTEGER :: TRANSFER_GPU_TO_CPU = 0 + REAL :: TOTAL_TIME_TRANSFER_CPU_TO_GPU = 0 + REAL :: TOTAL_TIME_TRANSFER_GPU_TO_CPU = 0 + CONTAINS + PROCEDURE :: INC_CPU_TO_GPU_TRANSFER + PROCEDURE :: INC_GPU_TO_CPU_TRANSFER +END TYPE GPU_STATS -TYPE FIELD_2D +TYPE, ABSTRACT :: FIELD_2D ! A FIELD encapsulates a single multi-dimensional array and can ! provide block-indexed "views" of the data for automating the ! allocation and parallel iterration of NPROMA blocks. - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - REAL(KIND=JPRB), POINTER :: VIEW(:) => NULL() - ! TODO: Atlas-based field data storage field ! TODO: Do we still need to use pointers here? ! TYPE(ATLAS_FIELD), POINTER :: DATA @@ -48,62 +49,82 @@ MODULE FIELD_MODULE ! where the innermost dimension represents the horizontal and ! the outermost one is the block index. REAL(KIND=JPRB), POINTER :: PTR(:,:) => NULL() - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:) - ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:) => NULL() ! A separate data pointer that can be used to create ! a contiguous chunk of host memory to cleanly map to ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS + REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:) - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. ! Flag indicating the use a single block-buffer per thread LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS CONTAINS - PROCEDURE :: CLONE => FIELD_2D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_2D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_2D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_2D_GET_VIEW PROCEDURE :: FINAL => FIELD_2D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_2D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_2D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_2D_ENSURE_HOST + PROCEDURE :: FIELD_2D_FINAL + PROCEDURE :: GET_VIEW => FIELD_2D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_2D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_2D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_2D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_2D_GET_DEVICE_DATA_RDWR PROCEDURE :: DELETE_DEVICE => FIELD_2D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_2D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_2D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_2D_GET_HOST_DATA END TYPE FIELD_2D -TYPE FIELD_3D +TYPE, EXTENDS(FIELD_2D) :: FIELD_2D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_2D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_2D_WRAPPER_FINAL +END TYPE FIELD_2D_WRAPPER + +TYPE, EXTENDS(FIELD_2D) :: FIELD_2D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_2D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_2D_OWNER_FINAL +END TYPE FIELD_2D_OWNER + +TYPE, EXTENDS(FIELD_2D) :: FIELD_2D_WRAPPER_PACKED + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_2D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_2D_WRAPPER_PACKED_FINAL +END TYPE FIELD_2D_WRAPPER_PACKED + +TYPE FIELD_2D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_2D), POINTER :: PTR => NULL() +END TYPE FIELD_2D_PTR + +TYPE FIELD_2D_VIEW + ! Struct to hold array views, so we can make arrays of them + REAL(KIND=JPRB), POINTER :: P(:) => NULL() +END TYPE FIELD_2D_VIEW + +TYPE, ABSTRACT :: FIELD_3D ! A FIELD encapsulates a single multi-dimensional array and can ! provide block-indexed "views" of the data for automating the ! allocation and parallel iterration of NPROMA blocks. - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - REAL(KIND=JPRB), POINTER :: VIEW(:,:) => NULL() - ! TODO: Atlas-based field data storage field ! TODO: Do we still need to use pointers here? ! TYPE(ATLAS_FIELD), POINTER :: DATA @@ -114,64 +135,82 @@ MODULE FIELD_MODULE ! where the innermost dimension represents the horizontal and ! the outermost one is the block index. REAL(KIND=JPRB), POINTER :: PTR(:,:,:) => NULL() - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:) - ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX - - ! ! A separate data pointer that can be used to create - ! ! a contiguous chunk of host memory to cleanly map to - ! ! device, should the %DATA pointer be discontiguous. - ! REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:) => NULL() + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS - - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. ! Flag indicating the use a single block-buffer per thread LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS CONTAINS - PROCEDURE :: CLONE => FIELD_3D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_3D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_3D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_3D_GET_VIEW PROCEDURE :: FINAL => FIELD_3D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_3D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_3D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_3D_ENSURE_HOST + PROCEDURE :: FIELD_3D_FINAL + PROCEDURE :: GET_VIEW => FIELD_3D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_3D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_3D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_3D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_3D_GET_DEVICE_DATA_RDWR PROCEDURE :: DELETE_DEVICE => FIELD_3D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_3D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_3D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_3D_GET_HOST_DATA END TYPE FIELD_3D -TYPE FIELD_4D +TYPE, EXTENDS(FIELD_3D) :: FIELD_3D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_3D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_3D_WRAPPER_FINAL +END TYPE FIELD_3D_WRAPPER + +TYPE, EXTENDS(FIELD_3D) :: FIELD_3D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_3D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_3D_OWNER_FINAL +END TYPE FIELD_3D_OWNER + +TYPE, EXTENDS(FIELD_3D) :: FIELD_3D_WRAPPER_PACKED + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_3D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_3D_WRAPPER_PACKED_FINAL +END TYPE FIELD_3D_WRAPPER_PACKED + +TYPE FIELD_3D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_3D), POINTER :: PTR => NULL() +END TYPE FIELD_3D_PTR + +TYPE FIELD_3D_VIEW + ! Struct to hold array views, so we can make arrays of them + REAL(KIND=JPRB), POINTER :: P(:,:) => NULL() +END TYPE FIELD_3D_VIEW + +TYPE, ABSTRACT :: FIELD_4D ! A FIELD encapsulates a single multi-dimensional array and can ! provide block-indexed "views" of the data for automating the ! allocation and parallel iterration of NPROMA blocks. - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - REAL(KIND=JPRB), POINTER :: VIEW(:,:,:) => NULL() - ! TODO: Atlas-based field data storage field ! TODO: Do we still need to use pointers here? ! TYPE(ATLAS_FIELD), POINTER :: DATA @@ -182,63 +221,82 @@ MODULE FIELD_MODULE ! where the innermost dimension represents the horizontal and ! the outermost one is the block index. REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) => NULL() - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:,:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:,:) - ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:,:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:,:) => NULL() ! A separate data pointer that can be used to create ! a contiguous chunk of host memory to cleanly map to ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS + REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:) - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. ! Flag indicating the use a single block-buffer per thread LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS CONTAINS - PROCEDURE :: CLONE => FIELD_4D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_4D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_4D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_4D_GET_VIEW PROCEDURE :: FINAL => FIELD_4D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_4D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_4D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_4D_ENSURE_HOST + PROCEDURE :: FIELD_4D_FINAL + PROCEDURE :: GET_VIEW => FIELD_4D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_4D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_4D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_4D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_4D_GET_DEVICE_DATA_RDWR PROCEDURE :: DELETE_DEVICE => FIELD_4D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_4D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_4D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_4D_GET_HOST_DATA END TYPE FIELD_4D +TYPE, EXTENDS(FIELD_4D) :: FIELD_4D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_4D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_4D_WRAPPER_FINAL +END TYPE FIELD_4D_WRAPPER + +TYPE, EXTENDS(FIELD_4D) :: FIELD_4D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_4D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_4D_OWNER_FINAL +END TYPE FIELD_4D_OWNER + +TYPE, EXTENDS(FIELD_4D) :: FIELD_4D_WRAPPER_PACKED + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_4D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_4D_WRAPPER_PACKED_FINAL +END TYPE FIELD_4D_WRAPPER_PACKED + +TYPE FIELD_4D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_4D), POINTER :: PTR => NULL() +END TYPE FIELD_4D_PTR + +TYPE FIELD_4D_VIEW + ! Struct to hold array views, so we can make arrays of them + REAL(KIND=JPRB), POINTER :: P(:,:,:) => NULL() +END TYPE FIELD_4D_VIEW -TYPE FIELD_INT2D +TYPE, ABSTRACT :: FIELD_5D ! A FIELD encapsulates a single multi-dimensional array and can ! provide block-indexed "views" of the data for automating the ! allocation and parallel iterration of NPROMA blocks. - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - INTEGER(KIND=JPIM), POINTER :: VIEW(:) => NULL() - ! TODO: Atlas-based field data storage field ! TODO: Do we still need to use pointers here? ! TYPE(ATLAS_FIELD), POINTER :: DATA @@ -248,64 +306,83 @@ MODULE FIELD_MODULE ! The underlying storage pointer has the rank as the dimension, ! where the innermost dimension represents the horizontal and ! the outermost one is the block index. - INTEGER(KIND=JPIM), POINTER :: PTR(:,:) => NULL() - ! INTEGER(KIND=JPIM), ALLOCATABLE :: DATA(:,:) - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:) - ! INTEGER(KIND=JPIM), ALLOCATABLE, PINNED :: DATA(:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX + REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:,:) => NULL() + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:,:) => NULL() + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:,:,:) => NULL() ! A separate data pointer that can be used to create ! a contiguous chunk of host memory to cleanly map to ! device, should the %DATA pointer be discontiguous. - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS + REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:,:) - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. ! Flag indicating the use a single block-buffer per thread LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS CONTAINS - PROCEDURE :: CLONE => FIELD_INT2D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_INT2D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_INT2D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_INT2D_GET_VIEW - PROCEDURE :: FINAL => FIELD_INT2D_FINAL + PROCEDURE :: FINAL => FIELD_5D_FINAL + PROCEDURE :: FIELD_5D_FINAL + PROCEDURE :: GET_VIEW => FIELD_5D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_5D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_5D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_5D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_5D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_5D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_5D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_5D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_5D_GET_HOST_DATA +END TYPE FIELD_5D + +TYPE, EXTENDS(FIELD_5D) :: FIELD_5D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_5D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_5D_WRAPPER_FINAL +END TYPE FIELD_5D_WRAPPER + +TYPE, EXTENDS(FIELD_5D) :: FIELD_5D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_5D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_5D_OWNER_FINAL +END TYPE FIELD_5D_OWNER + +TYPE, EXTENDS(FIELD_5D) :: FIELD_5D_WRAPPER_PACKED + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_5D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_5D_WRAPPER_PACKED_FINAL +END TYPE FIELD_5D_WRAPPER_PACKED - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_INT2D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_INT2D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_INT2D_ENSURE_HOST - PROCEDURE :: DELETE_DEVICE => FIELD_INT2D_DELETE_DEVICE -END TYPE FIELD_INT2D +TYPE FIELD_5D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_5D), POINTER :: PTR => NULL() +END TYPE FIELD_5D_PTR +TYPE FIELD_5D_VIEW + ! Struct to hold array views, so we can make arrays of them + REAL(KIND=JPRB), POINTER :: P(:,:,:,:) => NULL() +END TYPE FIELD_5D_VIEW -TYPE FIELD_LOG2D +TYPE, ABSTRACT :: FIELD_INT2D ! A FIELD encapsulates a single multi-dimensional array and can ! provide block-indexed "views" of the data for automating the ! allocation and parallel iterration of NPROMA blocks. - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - LOGICAL, POINTER :: VIEW(:) => NULL() - ! TODO: Atlas-based field data storage field ! TODO: Do we still need to use pointers here? ! TYPE(ATLAS_FIELD), POINTER :: DATA @@ -315,1875 +392,5738 @@ MODULE FIELD_MODULE ! The underlying storage pointer has the rank as the dimension, ! where the innermost dimension represents the horizontal and ! the outermost one is the block index. - LOGICAL, POINTER :: PTR(:,:) => NULL() - ! LOGICAL, ALLOCATABLE :: DATA(:,:) - LOGICAL, POINTER, CONTIGUOUS :: DATA(:,:) - ! LOGICAL, ALLOCATABLE, PINNED :: DATA(:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - LOGICAL, POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX + INTEGER(KIND=JPIM), POINTER :: PTR(:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:) => NULL() ! A separate data pointer that can be used to create ! a contiguous chunk of host memory to cleanly map to ! device, should the %DATA pointer be discontiguous. - LOGICAL, POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() + INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:) - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS - - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. ! Flag indicating the use a single block-buffer per thread LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS CONTAINS - PROCEDURE :: CLONE => FIELD_LOG2D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_LOG2D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_LOG2D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_LOG2D_GET_VIEW - PROCEDURE :: FINAL => FIELD_LOG2D_FINAL + PROCEDURE :: FINAL => FIELD_INT2D_FINAL + PROCEDURE :: FIELD_INT2D_FINAL + PROCEDURE :: GET_VIEW => FIELD_INT2D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT2D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT2D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT2D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT2D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_INT2D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_INT2D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_LOG2D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_LOG2D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_LOG2D_ENSURE_HOST - PROCEDURE :: DELETE_DEVICE => FIELD_LOG2D_DELETE_DEVICE -END TYPE FIELD_LOG2D + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT2D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT2D_GET_HOST_DATA +END TYPE FIELD_INT2D +TYPE, EXTENDS(FIELD_INT2D) :: FIELD_INT2D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_INT2D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_INT2D_WRAPPER_FINAL +END TYPE FIELD_INT2D_WRAPPER + +TYPE, EXTENDS(FIELD_INT2D) :: FIELD_INT2D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_INT2D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_INT2D_OWNER_FINAL +END TYPE FIELD_INT2D_OWNER -TYPE FIELD_2D_PTR - ! Struct to hold references to field objects - TYPE(FIELD_2D), POINTER :: PTR => NULL() -END TYPE FIELD_2D_PTR +TYPE, EXTENDS(FIELD_INT2D) :: FIELD_INT2D_WRAPPER_PACKED + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_INT2D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_INT2D_WRAPPER_PACKED_FINAL +END TYPE FIELD_INT2D_WRAPPER_PACKED -TYPE FIELD_2D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:) => NULL() -END TYPE FIELD_2D_VIEW -TYPE FIELD_3D_PTR +TYPE FIELD_INT2D_PTR ! Struct to hold references to field objects - TYPE(FIELD_3D), POINTER :: PTR => NULL() -END TYPE FIELD_3D_PTR + CLASS(FIELD_INT2D), POINTER :: PTR => NULL() +END TYPE FIELD_INT2D_PTR -TYPE FIELD_3D_VIEW +TYPE FIELD_INT2D_VIEW ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:,:) => NULL() -END TYPE FIELD_3D_VIEW -TYPE FIELD_4D_PTR - ! Struct to hold references to field objects - TYPE(FIELD_4D), POINTER :: PTR => NULL() -END TYPE FIELD_4D_PTR + INTEGER(KIND=JPIM), POINTER :: P(:) => NULL() +END TYPE FIELD_INT2D_VIEW -TYPE FIELD_4D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:,:,:) => NULL() -END TYPE FIELD_4D_VIEW +TYPE, ABSTRACT :: FIELD_INT3D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA -INTERFACE FIELD_2D - MODULE PROCEDURE :: FIELD_2D_WRAP - MODULE PROCEDURE :: FIELD_2D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_2D_EMPTY - MODULE PROCEDURE :: FIELD_2D_ALLOCATE -END INTERFACE - -INTERFACE FIELD_3D - MODULE PROCEDURE :: FIELD_3D_WRAP - MODULE PROCEDURE :: FIELD_3D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_3D_EMPTY - MODULE PROCEDURE :: FIELD_3D_ALLOCATE -END INTERFACE - -INTERFACE FIELD_4D - MODULE PROCEDURE :: FIELD_4D_WRAP - MODULE PROCEDURE :: FIELD_4D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_4D_EMPTY - MODULE PROCEDURE :: FIELD_4D_ALLOCATE -END INTERFACE - - -INTERFACE FIELD_INT2D - MODULE PROCEDURE :: FIELD_INT2D_WRAP - MODULE PROCEDURE :: FIELD_INT2D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_INT2D_EMPTY - MODULE PROCEDURE :: FIELD_INT2D_ALLOCATE -END INTERFACE - - -INTERFACE FIELD_LOG2D - MODULE PROCEDURE :: FIELD_LOG2D_WRAP - MODULE PROCEDURE :: FIELD_LOG2D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_LOG2D_EMPTY - MODULE PROCEDURE :: FIELD_LOG2D_ALLOCATE -END INTERFACE - - -INTERFACE FILL_BUFFER - MODULE PROCEDURE :: FILL_BUFFER_2D, FILL_BUFFER_3D, FILL_BUFFER_4D - MODULE PROCEDURE :: FILL_BUFFER_INT2D, FILL_BUFFER_LOG2D -END INTERFACE FILL_BUFFER - -INTERFACE FIELD_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_2D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_3D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_4D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_CREATE_DEVICE -END INTERFACE FIELD_CREATE_DEVICE - -INTERFACE FIELD_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_2D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_3D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_4D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_UPDATE_DEVICE -END INTERFACE FIELD_UPDATE_DEVICE - -INTERFACE FIELD_UPDATE_HOST - MODULE PROCEDURE :: FIELD_2D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_3D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_4D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_INT2D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_LOG2D_UPDATE_HOST -END INTERFACE FIELD_UPDATE_HOST - -INTERFACE FIELD_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_2D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_3D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_4D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_DELETE_DEVICE -END INTERFACE FIELD_DELETE_DEVICE - -INTERFACE GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_2D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_3D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_4D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_INT2D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_LOG2D_GET_DEVICE_DATA -END INTERFACE GET_DEVICE_DATA - -INTERFACE FIELD_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_2D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_3D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_4D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_ENSURE_DEVICE -END INTERFACE FIELD_ENSURE_DEVICE - -INTERFACE FIELD_ENSURE_HOST - MODULE PROCEDURE :: FIELD_2D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_3D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_4D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_INT2D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_LOG2D_ENSURE_HOST -END INTERFACE FIELD_ENSURE_HOST + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:,:) => NULL() -CONTAINS + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) - function malloc_host_pinned_2d(shape, nblocks) result(ptr) - integer(kind=jpim), intent(in) :: shape(1) - integer(kind=jpim), intent(in), optional :: nblocks - real(kind=jprb), pointer, contiguous :: ptr(:,:) - - integer(kind=jpim) :: istat, arrsize - type(c_ptr) :: hptr - - arrsize = shape(1) * nblocks * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" - call c_f_pointer(hptr, ptr, [shape(1), nblocks] ) - end function malloc_host_pinned_2d - - function malloc_host_pinned_3d(shape, nblocks) result(ptr) - integer(kind=jpim), intent(in) :: shape(2) - integer(kind=jpim), intent(in), optional :: nblocks - real(kind=jprb), pointer, contiguous :: ptr(:,:,:) - - integer(kind=jpim) :: istat, arrsize - type(c_ptr) :: hptr - - arrsize = shape(1) * shape(2) * nblocks * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" - call c_f_pointer(hptr, ptr, [shape(1), shape(2), nblocks] ) - end function malloc_host_pinned_3d - - function malloc_host_pinned_4d(shape, nblocks) result(ptr) - integer(kind=jpim), intent(in) :: shape(3) - integer(kind=jpim), intent(in), optional :: nblocks - real(kind=jprb), pointer, contiguous :: ptr(:,:,:,:) - - integer(kind=jpim) :: istat, arrsize - type(c_ptr) :: hptr - - arrsize = shape(1) * shape(2) * shape(3) * nblocks * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" - call c_f_pointer(hptr, ptr, [shape(1), shape(2), shape(3), nblocks] ) - end function malloc_host_pinned_4d - - - SUBROUTINE FILL_BUFFER_2D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: IDX + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. - IDX = INDEX+1 - BUFFER(IDX:) = BUFFER(INDEX) - END SUBROUTINE FILL_BUFFER_2D + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - SUBROUTINE FILL_BUFFER_3D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: I, IDX + TYPE(GPU_STATS) :: STATS - IDX = INDEX+1 - DO I=1, SIZE(BUFFER, 2) - BUFFER(IDX:,I) = BUFFER(INDEX,I) - END DO - END SUBROUTINE FILL_BUFFER_3D - - SUBROUTINE FILL_BUFFER_4D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: I, J, IDX - - IDX = INDEX+1 - DO I=1, SIZE(BUFFER, 2) - DO J=1, SIZE(BUFFER, 3) - BUFFER(IDX:,I,J) = BUFFER(INDEX,I,J) - END DO - END DO - END SUBROUTINE FILL_BUFFER_4D +CONTAINS - SUBROUTINE FILL_BUFFER_INT2D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: BUFFER(:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: IDX + PROCEDURE :: FINAL => FIELD_INT3D_FINAL + PROCEDURE :: FIELD_INT3D_FINAL + PROCEDURE :: GET_VIEW => FIELD_INT3D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT3D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT3D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT3D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT3D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_INT3D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_INT3D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT3D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT3D_GET_HOST_DATA +END TYPE FIELD_INT3D + +TYPE, EXTENDS(FIELD_INT3D) :: FIELD_INT3D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_INT3D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_INT3D_WRAPPER_FINAL +END TYPE FIELD_INT3D_WRAPPER + +TYPE, EXTENDS(FIELD_INT3D) :: FIELD_INT3D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_INT3D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_INT3D_OWNER_FINAL +END TYPE FIELD_INT3D_OWNER + +TYPE, EXTENDS(FIELD_INT3D) :: FIELD_INT3D_WRAPPER_PACKED + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_INT3D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_INT3D_WRAPPER_PACKED_FINAL +END TYPE FIELD_INT3D_WRAPPER_PACKED - IDX = INDEX+1 - BUFFER(IDX:) = BUFFER(INDEX) - END SUBROUTINE FILL_BUFFER_INT2D +TYPE FIELD_INT3D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_INT3D), POINTER :: PTR => NULL() +END TYPE FIELD_INT3D_PTR - SUBROUTINE FILL_BUFFER_LOG2D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - LOGICAL, POINTER, INTENT(INOUT) :: BUFFER(:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: IDX +TYPE FIELD_INT3D_VIEW + ! Struct to hold array views, so we can make arrays of them + INTEGER(KIND=JPIM), POINTER :: P(:,:) => NULL() +END TYPE FIELD_INT3D_VIEW - IDX = INDEX+1 - BUFFER(IDX:) = BUFFER(INDEX) - END SUBROUTINE FILL_BUFFER_LOG2D +TYPE, ABSTRACT :: FIELD_INT4D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. - FUNCTION FIELD_2D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_2D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_2D_EMPTY + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA - FUNCTION FIELD_3D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_3D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(2) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1),SHAPE(2))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_3D_EMPTY + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:,:,:) => NULL() - FUNCTION FIELD_4D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_4D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(3) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1),SHAPE(2),SHAPE(3))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_4D_EMPTY + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:) - FUNCTION FIELD_INT2D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_INT2D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_INT2D_EMPTY + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. - FUNCTION FIELD_LOG2D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_LOG2D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_LOG2D_EMPTY + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - FUNCTION FIELD_2D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:) + TYPE(GPU_STATS) :: STATS - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_2D_WRAP +CONTAINS - FUNCTION FIELD_3D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) + PROCEDURE :: FINAL => FIELD_INT4D_FINAL + PROCEDURE :: FIELD_INT4D_FINAL + PROCEDURE :: GET_VIEW => FIELD_INT4D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT4D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT4D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT4D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT4D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_INT4D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_INT4D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT4D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT4D_GET_HOST_DATA +END TYPE FIELD_INT4D + +TYPE, EXTENDS(FIELD_INT4D) :: FIELD_INT4D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_INT4D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_INT4D_WRAPPER_FINAL +END TYPE FIELD_INT4D_WRAPPER + +TYPE, EXTENDS(FIELD_INT4D) :: FIELD_INT4D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_INT4D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_INT4D_OWNER_FINAL +END TYPE FIELD_INT4D_OWNER + +TYPE, EXTENDS(FIELD_INT4D) :: FIELD_INT4D_WRAPPER_PACKED + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_INT4D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_INT4D_WRAPPER_PACKED_FINAL +END TYPE FIELD_INT4D_WRAPPER_PACKED - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 3) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_3D_WRAP +TYPE FIELD_INT4D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_INT4D), POINTER :: PTR => NULL() +END TYPE FIELD_INT4D_PTR - FUNCTION FIELD_4D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) +TYPE FIELD_INT4D_VIEW + ! Struct to hold array views, so we can make arrays of them + INTEGER(KIND=JPIM), POINTER :: P(:,:,:) => NULL() +END TYPE FIELD_INT4D_VIEW - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 4) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_4D_WRAP +TYPE, ABSTRACT :: FIELD_INT5D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. - FUNCTION FIELD_INT2D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:) + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_INT2D_WRAP + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:,:,:,:) => NULL() - FUNCTION FIELD_LOG2D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_LOG2D), TARGET :: SELF - LOGICAL, TARGET, INTENT(IN) :: DATA(:,:) + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:,:) - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_LOG2D_WRAP - - FUNCTION FIELD_2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS + +CONTAINS + + PROCEDURE :: FINAL => FIELD_INT5D_FINAL + PROCEDURE :: FIELD_INT5D_FINAL + PROCEDURE :: GET_VIEW => FIELD_INT5D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT5D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT5D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT5D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT5D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_INT5D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_INT5D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT5D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT5D_GET_HOST_DATA +END TYPE FIELD_INT5D + +TYPE, EXTENDS(FIELD_INT5D) :: FIELD_INT5D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_INT5D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_INT5D_WRAPPER_FINAL +END TYPE FIELD_INT5D_WRAPPER + +TYPE, EXTENDS(FIELD_INT5D) :: FIELD_INT5D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_INT5D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_INT5D_OWNER_FINAL +END TYPE FIELD_INT5D_OWNER + +TYPE, EXTENDS(FIELD_INT5D) :: FIELD_INT5D_WRAPPER_PACKED + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_INT5D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_INT5D_WRAPPER_PACKED_FINAL +END TYPE FIELD_INT5D_WRAPPER_PACKED + +TYPE FIELD_INT5D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_INT5D), POINTER :: PTR => NULL() +END TYPE FIELD_INT5D_PTR + +TYPE FIELD_INT5D_VIEW + ! Struct to hold array views, so we can make arrays of them + INTEGER(KIND=JPIM), POINTER :: P(:,:,:,:) => NULL() +END TYPE FIELD_INT5D_VIEW + +TYPE, ABSTRACT :: FIELD_LOG2D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:) => NULL() + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:) + + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS + +CONTAINS + + PROCEDURE :: FINAL => FIELD_LOG2D_FINAL + PROCEDURE :: FIELD_LOG2D_FINAL + PROCEDURE :: GET_VIEW => FIELD_LOG2D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG2D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG2D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG2D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG2D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_LOG2D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_LOG2D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG2D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG2D_GET_HOST_DATA +END TYPE FIELD_LOG2D + +TYPE, EXTENDS(FIELD_LOG2D) :: FIELD_LOG2D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_LOG2D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_LOG2D_WRAPPER_FINAL +END TYPE FIELD_LOG2D_WRAPPER + +TYPE, EXTENDS(FIELD_LOG2D) :: FIELD_LOG2D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_LOG2D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_LOG2D_OWNER_FINAL +END TYPE FIELD_LOG2D_OWNER + +TYPE, EXTENDS(FIELD_LOG2D) :: FIELD_LOG2D_WRAPPER_PACKED + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_LOG2D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_LOG2D_WRAPPER_PACKED_FINAL +END TYPE FIELD_LOG2D_WRAPPER_PACKED + +TYPE FIELD_LOG2D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_LOG2D), POINTER :: PTR => NULL() +END TYPE FIELD_LOG2D_PTR + +TYPE FIELD_LOG2D_VIEW + ! Struct to hold array views, so we can make arrays of them + LOGICAL(KIND=JPLM), POINTER :: P(:) => NULL() +END TYPE FIELD_LOG2D_VIEW + +TYPE, ABSTRACT :: FIELD_LOG3D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:,:) => NULL() + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) + + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS + +CONTAINS + + PROCEDURE :: FINAL => FIELD_LOG3D_FINAL + PROCEDURE :: FIELD_LOG3D_FINAL + PROCEDURE :: GET_VIEW => FIELD_LOG3D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG3D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG3D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG3D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG3D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_LOG3D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_LOG3D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG3D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG3D_GET_HOST_DATA +END TYPE FIELD_LOG3D + +TYPE, EXTENDS(FIELD_LOG3D) :: FIELD_LOG3D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_LOG3D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_LOG3D_WRAPPER_FINAL +END TYPE FIELD_LOG3D_WRAPPER + +TYPE, EXTENDS(FIELD_LOG3D) :: FIELD_LOG3D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_LOG3D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_LOG3D_OWNER_FINAL +END TYPE FIELD_LOG3D_OWNER + +TYPE, EXTENDS(FIELD_LOG3D) :: FIELD_LOG3D_WRAPPER_PACKED + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_LOG3D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_LOG3D_WRAPPER_PACKED_FINAL +END TYPE FIELD_LOG3D_WRAPPER_PACKED + +TYPE FIELD_LOG3D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_LOG3D), POINTER :: PTR => NULL() +END TYPE FIELD_LOG3D_PTR + +TYPE FIELD_LOG3D_VIEW + ! Struct to hold array views, so we can make arrays of them + LOGICAL(KIND=JPLM), POINTER :: P(:,:) => NULL() +END TYPE FIELD_LOG3D_VIEW + +TYPE, ABSTRACT :: FIELD_LOG4D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:,:,:) => NULL() + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:) + + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS + +CONTAINS + + PROCEDURE :: FINAL => FIELD_LOG4D_FINAL + PROCEDURE :: FIELD_LOG4D_FINAL + PROCEDURE :: GET_VIEW => FIELD_LOG4D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG4D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG4D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG4D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG4D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_LOG4D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_LOG4D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG4D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG4D_GET_HOST_DATA +END TYPE FIELD_LOG4D + +TYPE, EXTENDS(FIELD_LOG4D) :: FIELD_LOG4D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_LOG4D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_LOG4D_WRAPPER_FINAL +END TYPE FIELD_LOG4D_WRAPPER + +TYPE, EXTENDS(FIELD_LOG4D) :: FIELD_LOG4D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_LOG4D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_LOG4D_OWNER_FINAL +END TYPE FIELD_LOG4D_OWNER + +TYPE, EXTENDS(FIELD_LOG4D) :: FIELD_LOG4D_WRAPPER_PACKED + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_LOG4D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_LOG4D_WRAPPER_PACKED_FINAL +END TYPE FIELD_LOG4D_WRAPPER_PACKED + +TYPE FIELD_LOG4D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_LOG4D), POINTER :: PTR => NULL() +END TYPE FIELD_LOG4D_PTR + +TYPE FIELD_LOG4D_VIEW + ! Struct to hold array views, so we can make arrays of them + LOGICAL(KIND=JPLM), POINTER :: P(:,:,:) => NULL() +END TYPE FIELD_LOG4D_VIEW + +TYPE, ABSTRACT :: FIELD_LOG5D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:,:) => NULL() + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:,:,:,:) => NULL() + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:,:) + + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + + INTEGER(KIND=JPIM) :: ISTATUS = 0 + INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 + + TYPE(GPU_STATS) :: STATS + +CONTAINS + + PROCEDURE :: FINAL => FIELD_LOG5D_FINAL + PROCEDURE :: FIELD_LOG5D_FINAL + PROCEDURE :: GET_VIEW => FIELD_LOG5D_GET_VIEW + PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG5D_GET_HOST_DATA_RDONLY + PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG5D_GET_HOST_DATA_RDWR + PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG5D_GET_DEVICE_DATA_RDONLY + PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG5D_GET_DEVICE_DATA_RDWR + PROCEDURE :: DELETE_DEVICE => FIELD_LOG5D_DELETE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_LOG5D_ENSURE_HOST + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST + + PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG5D_GET_DEVICE_DATA + PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG5D_GET_HOST_DATA +END TYPE FIELD_LOG5D + +TYPE, EXTENDS(FIELD_LOG5D) :: FIELD_LOG5D_WRAPPER +CONTAINS + PROCEDURE :: INIT => FIELD_LOG5D_WRAP + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST_WRAPPER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE_WRAPPER + PROCEDURE :: FINAL => FIELD_LOG5D_WRAPPER_FINAL +END TYPE FIELD_LOG5D_WRAPPER + +TYPE, EXTENDS(FIELD_LOG5D) :: FIELD_LOG5D_OWNER + TYPE(C_PTR) :: HPTR +CONTAINS + PROCEDURE :: INIT => FIELD_LOG5D_ALLOCATE + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST_OWNER + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE_OWNER + PROCEDURE :: FINAL => FIELD_LOG5D_OWNER_FINAL +END TYPE FIELD_LOG5D_OWNER + +TYPE, EXTENDS(FIELD_LOG5D) :: FIELD_LOG5D_WRAPPER_PACKED + LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX +CONTAINS + PROCEDURE :: INIT => FIELD_LOG5D_WRAP_PACKED + PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST_WRAPPER_PACKED + PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE_WRAPPER_PACKED + PROCEDURE :: FINAL => FIELD_LOG5D_WRAPPER_PACKED_FINAL +END TYPE FIELD_LOG5D_WRAPPER_PACKED + +TYPE FIELD_LOG5D_PTR + ! Struct to hold references to field objects + CLASS(FIELD_LOG5D), POINTER :: PTR => NULL() +END TYPE FIELD_LOG5D_PTR + +TYPE FIELD_LOG5D_VIEW + ! Struct to hold array views, so we can make arrays of them + LOGICAL(KIND=JPLM), POINTER :: P(:,:,:,:) => NULL() +END TYPE FIELD_LOG5D_VIEW + +CONTAINS +! +! CLASS METHODS +! + SUBROUTINE FIELD_2D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_2D_WRAPPER), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 2)) THEN + CALL ABOR1 ('FIELD_2D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_2D_WRAP + + SUBROUTINE FIELD_2D_UPDATE_DEVICE(SELF) + CLASS(FIELD_2D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_2D_UPDATE_DEVICE + + SUBROUTINE FIELD_2D_UPDATE_HOST(SELF) + CLASS(FIELD_2D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_2D_UPDATE_HOST + + SUBROUTINE FIELD_2D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_2D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA(:,IDX,:) + ELSE + SELF%PTR => DATA(:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_2D_WRAP_PACKED + + SUBROUTINE FIELD_2D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_2D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(2) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(2) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(2) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(2) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(2) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 2 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(2) = 1 + REAL_UBOUNDS(2) = UBOUNDS(2) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_2D_ALLOCATE + + FUNCTION FIELD_2D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_2D) :: SELF + REAL(KIND=JPRB), POINTER :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):) => SELF%PTR(:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = 0.0_JPRB + END IF + END FUNCTION FIELD_2D_GET_VIEW + + SUBROUTINE FIELD_2D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_2D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_2D_DELETE_DEVICE + + SUBROUTINE FIELD_2D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_2D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_2D_FINAL + + SUBROUTINE FIELD_2D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_2D_WRAPPER) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_2D_FINAL + END SUBROUTINE FIELD_2D_WRAPPER_FINAL + + SUBROUTINE FIELD_2D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_2D_WRAPPER_PACKED) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_2D_FINAL + END SUBROUTINE FIELD_2D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_2D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_2D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_2D_FINAL + END SUBROUTINE FIELD_2D_OWNER_FINAL + + SUBROUTINE FIELD_2D_ENSURE_HOST(SELF) + CLASS(FIELD_2D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_2D_ENSURE_HOST + + SUBROUTINE FIELD_2D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_2D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):) => SELF%PTR (:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_2D_GET_HOST_DATA + + SUBROUTINE FIELD_2D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_2D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_2D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_2D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_2D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_2D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_2D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_2D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_2D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_2D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):) => SELF%DEVPTR(:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_2D_GET_DEVICE_DATA + + SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_2D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_2D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_2D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_2D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_2D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%PTR(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_2D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%BASE_PTR(:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:, SELF%FIDX,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_2D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_2D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_2D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_2D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_2D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_2D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_3D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_3D_WRAPPER), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 3)) THEN + CALL ABOR1 ('FIELD_3D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_3D_WRAP + + SUBROUTINE FIELD_3D_UPDATE_DEVICE(SELF) + CLASS(FIELD_3D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_3D_UPDATE_DEVICE + + SUBROUTINE FIELD_3D_UPDATE_HOST(SELF) + CLASS(FIELD_3D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_3D_UPDATE_HOST + + SUBROUTINE FIELD_3D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_3D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA(:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_3D_WRAP_PACKED + + SUBROUTINE FIELD_3D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_3D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(3) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(3) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(3) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(3) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(3) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 3 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(3) = 1 + REAL_UBOUNDS(3) = UBOUNDS(3) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_3D_ALLOCATE + + FUNCTION FIELD_3D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_3D) :: SELF + REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):) => SELF%PTR(:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB + END IF + END FUNCTION FIELD_3D_GET_VIEW + + SUBROUTINE FIELD_3D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_3D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_3D_DELETE_DEVICE + + SUBROUTINE FIELD_3D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_3D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_3D_FINAL + + SUBROUTINE FIELD_3D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_3D_WRAPPER) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_3D_FINAL + END SUBROUTINE FIELD_3D_WRAPPER_FINAL + + SUBROUTINE FIELD_3D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_3D_WRAPPER_PACKED) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_3D_FINAL + END SUBROUTINE FIELD_3D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_3D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_3D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_3D_FINAL + END SUBROUTINE FIELD_3D_OWNER_FINAL + + SUBROUTINE FIELD_3D_ENSURE_HOST(SELF) + CLASS(FIELD_3D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_3D_ENSURE_HOST + + SUBROUTINE FIELD_3D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_3D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%PTR (:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_3D_GET_HOST_DATA + + SUBROUTINE FIELD_3D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_3D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_3D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_3D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_3D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_3D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_3D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_3D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_3D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_3D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%DEVPTR(:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_3D_GET_DEVICE_DATA + + SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_3D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_3D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_3D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_3D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_3D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%PTR(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_3D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%BASE_PTR(:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_3D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_3D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_3D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_3D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_3D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_3D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_4D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_4D_WRAPPER), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 4)) THEN + CALL ABOR1 ('FIELD_4D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_4D_WRAP + + SUBROUTINE FIELD_4D_UPDATE_DEVICE(SELF) + CLASS(FIELD_4D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_4D_UPDATE_DEVICE + + SUBROUTINE FIELD_4D_UPDATE_HOST(SELF) + CLASS(FIELD_4D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_4D_UPDATE_HOST + + SUBROUTINE FIELD_4D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_4D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA(:,:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_4D_WRAP_PACKED + + SUBROUTINE FIELD_4D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_4D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(4) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(4) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(4) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(4) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(4) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 4 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(4) = 1 + REAL_UBOUNDS(4) = UBOUNDS(4) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_4D_ALLOCATE + + FUNCTION FIELD_4D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_4D) :: SELF + REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):) => SELF%PTR(:,:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB + END IF + END FUNCTION FIELD_4D_GET_VIEW + + SUBROUTINE FIELD_4D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_4D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_4D_DELETE_DEVICE + + SUBROUTINE FIELD_4D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_4D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_4D_FINAL + + SUBROUTINE FIELD_4D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_4D_WRAPPER) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_4D_FINAL + END SUBROUTINE FIELD_4D_WRAPPER_FINAL + + SUBROUTINE FIELD_4D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_4D_WRAPPER_PACKED) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_4D_FINAL + END SUBROUTINE FIELD_4D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_4D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_4D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_4D_FINAL + END SUBROUTINE FIELD_4D_OWNER_FINAL + + SUBROUTINE FIELD_4D_ENSURE_HOST(SELF) + CLASS(FIELD_4D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_4D_ENSURE_HOST + + SUBROUTINE FIELD_4D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_4D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%PTR (:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_4D_GET_HOST_DATA + + SUBROUTINE FIELD_4D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_4D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_4D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_4D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_4D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_4D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_4D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_4D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_4D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_4D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%DEVPTR(:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_4D_GET_DEVICE_DATA + + SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_4D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_4D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_4D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_4D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_4D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%PTR(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_4D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_4D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_4D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_4D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_4D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_4D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_4D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_5D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_5D_WRAPPER), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 5)) THEN + CALL ABOR1 ('FIELD_5D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_5D_WRAP + + SUBROUTINE FIELD_5D_UPDATE_DEVICE(SELF) + CLASS(FIELD_5D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_5D_UPDATE_DEVICE + + SUBROUTINE FIELD_5D_UPDATE_HOST(SELF) + CLASS(FIELD_5D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_5D_UPDATE_HOST + + SUBROUTINE FIELD_5D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_5D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA(:,:,:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_5D_WRAP_PACKED + + SUBROUTINE FIELD_5D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_5D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(5) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(5) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(5) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(5) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(5) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 5 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(5) = 1 + REAL_UBOUNDS(5) = UBOUNDS(5) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * (REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1) * SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1,REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):,REAL_LBOUNDS(5):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_5D_ALLOCATE + + FUNCTION FIELD_5D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_5D) :: SELF + REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(5) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):,LBOUNDS(4):) => SELF%PTR(:,:,:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:,:) = 0.0_JPRB + END IF + END FUNCTION FIELD_5D_GET_VIEW + + SUBROUTINE FIELD_5D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_5D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_5D_DELETE_DEVICE + + SUBROUTINE FIELD_5D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_5D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_5D_FINAL + + SUBROUTINE FIELD_5D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_5D_WRAPPER) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_5D_FINAL + END SUBROUTINE FIELD_5D_WRAPPER_FINAL + + SUBROUTINE FIELD_5D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_5D_WRAPPER_PACKED) :: SELF + REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_5D_FINAL + END SUBROUTINE FIELD_5D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_5D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_5D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_5D_FINAL + END SUBROUTINE FIELD_5D_OWNER_FINAL + + SUBROUTINE FIELD_5D_ENSURE_HOST(SELF) + CLASS(FIELD_5D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_5D_ENSURE_HOST + + SUBROUTINE FIELD_5D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_5D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(5) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%PTR (:,:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_5D_GET_HOST_DATA + + SUBROUTINE FIELD_5D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_5D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_5D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_5D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_5D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_5D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_5D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_5D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_5D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_5D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(5) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%DEVPTR(:,:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_5D_GET_DEVICE_DATA + + SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_5D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_5D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_5D), INTENT (INOUT) :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_5D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_5D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%PTR(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_5D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,:,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_5D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_5D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_5D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_5D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_5D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_5D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_INT2D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT2D_WRAPPER), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 2)) THEN + CALL ABOR1 ('FIELD_INT2D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_INT2D_WRAP + + SUBROUTINE FIELD_INT2D_UPDATE_DEVICE(SELF) + CLASS(FIELD_INT2D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE + + SUBROUTINE FIELD_INT2D_UPDATE_HOST(SELF) + CLASS(FIELD_INT2D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT2D_UPDATE_HOST + + SUBROUTINE FIELD_INT2D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT2D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA(:,IDX,:) + ELSE + SELF%PTR => DATA(:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_INT2D_WRAP_PACKED + + SUBROUTINE FIELD_INT2D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_INT2D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(2) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(2) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(2) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(2) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(2) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 2 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(2) = 1 + REAL_UBOUNDS(2) = UBOUNDS(2) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * SIZEOF(1_JPIM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_INT2D_ALLOCATE + + FUNCTION FIELD_INT2D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_INT2D) :: SELF + INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):) => SELF%PTR(:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = 0.0_JPIM + END IF + END FUNCTION FIELD_INT2D_GET_VIEW + + SUBROUTINE FIELD_INT2D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_INT2D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_INT2D_DELETE_DEVICE + + SUBROUTINE FIELD_INT2D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT2D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_INT2D_FINAL + + SUBROUTINE FIELD_INT2D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT2D_WRAPPER) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT2D_FINAL + END SUBROUTINE FIELD_INT2D_WRAPPER_FINAL + + SUBROUTINE FIELD_INT2D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT2D_WRAPPER_PACKED) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT2D_FINAL + END SUBROUTINE FIELD_INT2D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_INT2D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT2D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_INT2D_FINAL + END SUBROUTINE FIELD_INT2D_OWNER_FINAL + + SUBROUTINE FIELD_INT2D_ENSURE_HOST(SELF) + CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_INT2D_ENSURE_HOST + + SUBROUTINE FIELD_INT2D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):) => SELF%PTR (:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT2D_GET_HOST_DATA + + SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_INT2D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_INT2D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):) => SELF%DEVPTR(:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA + + SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_INT2D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_INT2D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_INT2D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%PTR(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_INT2D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%BASE_PTR(:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:, SELF%FIDX,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_INT2D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_INT2D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_INT2D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT2D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_INT3D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT3D_WRAPPER), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 3)) THEN + CALL ABOR1 ('FIELD_INT3D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_INT3D_WRAP + + SUBROUTINE FIELD_INT3D_UPDATE_DEVICE(SELF) + CLASS(FIELD_INT3D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE + + SUBROUTINE FIELD_INT3D_UPDATE_HOST(SELF) + CLASS(FIELD_INT3D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT3D_UPDATE_HOST + + SUBROUTINE FIELD_INT3D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT3D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA(:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_INT3D_WRAP_PACKED + + SUBROUTINE FIELD_INT3D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_INT3D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(3) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(3) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(3) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(3) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(3) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 3 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(3) = 1 + REAL_UBOUNDS(3) = UBOUNDS(3) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & SIZEOF(1_JPIM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_INT3D_ALLOCATE + + FUNCTION FIELD_INT3D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_INT3D) :: SELF + INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):) => SELF%PTR(:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:) = 0.0_JPIM + END IF + END FUNCTION FIELD_INT3D_GET_VIEW + + SUBROUTINE FIELD_INT3D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_INT3D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_INT3D_DELETE_DEVICE + + SUBROUTINE FIELD_INT3D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT3D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_INT3D_FINAL + + SUBROUTINE FIELD_INT3D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT3D_WRAPPER) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT3D_FINAL + END SUBROUTINE FIELD_INT3D_WRAPPER_FINAL + + SUBROUTINE FIELD_INT3D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT3D_WRAPPER_PACKED) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT3D_FINAL + END SUBROUTINE FIELD_INT3D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_INT3D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT3D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_INT3D_FINAL + END SUBROUTINE FIELD_INT3D_OWNER_FINAL + + SUBROUTINE FIELD_INT3D_ENSURE_HOST(SELF) + CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_INT3D_ENSURE_HOST + + SUBROUTINE FIELD_INT3D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%PTR (:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT3D_GET_HOST_DATA + + SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_INT3D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_INT3D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%DEVPTR(:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA + + SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_INT3D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_INT3D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_INT3D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%PTR(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_INT3D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%BASE_PTR(:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_INT3D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_INT3D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_INT3D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT3D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_INT4D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT4D_WRAPPER), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 4)) THEN + CALL ABOR1 ('FIELD_INT4D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_INT4D_WRAP + + SUBROUTINE FIELD_INT4D_UPDATE_DEVICE(SELF) + CLASS(FIELD_INT4D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE + + SUBROUTINE FIELD_INT4D_UPDATE_HOST(SELF) + CLASS(FIELD_INT4D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT4D_UPDATE_HOST + + SUBROUTINE FIELD_INT4D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT4D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA(:,:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_INT4D_WRAP_PACKED + + SUBROUTINE FIELD_INT4D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_INT4D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(4) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(4) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(4) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(4) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(4) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 4 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(4) = 1 + REAL_UBOUNDS(4) = UBOUNDS(4) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * SIZEOF(1_JPIM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_INT4D_ALLOCATE + + FUNCTION FIELD_INT4D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_INT4D) :: SELF + INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):) => SELF%PTR(:,:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPIM + END IF + END FUNCTION FIELD_INT4D_GET_VIEW + + SUBROUTINE FIELD_INT4D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_INT4D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_INT4D_DELETE_DEVICE + + SUBROUTINE FIELD_INT4D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT4D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_INT4D_FINAL + + SUBROUTINE FIELD_INT4D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT4D_WRAPPER) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT4D_FINAL + END SUBROUTINE FIELD_INT4D_WRAPPER_FINAL + + SUBROUTINE FIELD_INT4D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT4D_WRAPPER_PACKED) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT4D_FINAL + END SUBROUTINE FIELD_INT4D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_INT4D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT4D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_INT4D_FINAL + END SUBROUTINE FIELD_INT4D_OWNER_FINAL + + SUBROUTINE FIELD_INT4D_ENSURE_HOST(SELF) + CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_INT4D_ENSURE_HOST + + SUBROUTINE FIELD_INT4D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%PTR (:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT4D_GET_HOST_DATA + + SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_INT4D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_INT4D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%DEVPTR(:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA + + SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_INT4D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_INT4D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_INT4D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%PTR(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_INT4D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_INT4D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_INT4D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_INT4D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT4D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_INT5D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT5D_WRAPPER), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 5)) THEN + CALL ABOR1 ('FIELD_INT5D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_INT5D_WRAP + + SUBROUTINE FIELD_INT5D_UPDATE_DEVICE(SELF) + CLASS(FIELD_INT5D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE + + SUBROUTINE FIELD_INT5D_UPDATE_HOST(SELF) + CLASS(FIELD_INT5D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_INT5D_UPDATE_HOST + + SUBROUTINE FIELD_INT5D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_INT5D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA(:,:,:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_INT5D_WRAP_PACKED + + SUBROUTINE FIELD_INT5D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_INT5D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(5) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(5) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(5) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(5) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(5) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 5 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(5) = 1 + REAL_UBOUNDS(5) = UBOUNDS(5) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * (REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1) * SIZEOF(1_JPIM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1,REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):,REAL_LBOUNDS(5):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_INT5D_ALLOCATE + + FUNCTION FIELD_INT5D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_INT5D) :: SELF + INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(5) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):,LBOUNDS(4):) => SELF%PTR(:,:,:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:,:) = 0.0_JPIM + END IF + END FUNCTION FIELD_INT5D_GET_VIEW + + SUBROUTINE FIELD_INT5D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_INT5D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_INT5D_DELETE_DEVICE + + SUBROUTINE FIELD_INT5D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT5D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_INT5D_FINAL + + SUBROUTINE FIELD_INT5D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT5D_WRAPPER) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT5D_FINAL + END SUBROUTINE FIELD_INT5D_WRAPPER_FINAL + + SUBROUTINE FIELD_INT5D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT5D_WRAPPER_PACKED) :: SELF + INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_INT5D_FINAL + END SUBROUTINE FIELD_INT5D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_INT5D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_INT5D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_INT5D_FINAL + END SUBROUTINE FIELD_INT5D_OWNER_FINAL + + SUBROUTINE FIELD_INT5D_ENSURE_HOST(SELF) + CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_INT5D_ENSURE_HOST + + SUBROUTINE FIELD_INT5D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(5) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%PTR (:,:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT5D_GET_HOST_DATA + + SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_INT5D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_INT5D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(5) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%DEVPTR(:,:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA + + SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_INT5D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + + CALL FIELD_INT5D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_INT5D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%PTR(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_INT5D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,:,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_INT5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_INT5D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_INT5D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_INT5D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_INT5D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_LOG2D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG2D_WRAPPER), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 2)) THEN + CALL ABOR1 ('FIELD_LOG2D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_LOG2D_WRAP + + SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE(SELF) + CLASS(FIELD_LOG2D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE + + SUBROUTINE FIELD_LOG2D_UPDATE_HOST(SELF) + CLASS(FIELD_LOG2D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG2D_UPDATE_HOST + + SUBROUTINE FIELD_LOG2D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG2D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA(:,IDX,:) + ELSE + SELF%PTR => DATA(:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_LOG2D_WRAP_PACKED + + SUBROUTINE FIELD_LOG2D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_LOG2D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(2) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(2) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(2) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(2) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(2) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 2 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(2) = 1 + REAL_UBOUNDS(2) = UBOUNDS(2) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * SIZEOF(.TRUE._JPLM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_LOG2D_ALLOCATE + + FUNCTION FIELD_LOG2D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_LOG2D) :: SELF + LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):) => SELF%PTR(:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = .FALSE. + END IF + END FUNCTION FIELD_LOG2D_GET_VIEW + + SUBROUTINE FIELD_LOG2D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_LOG2D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_LOG2D_DELETE_DEVICE + + SUBROUTINE FIELD_LOG2D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG2D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_LOG2D_FINAL + + SUBROUTINE FIELD_LOG2D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG2D_WRAPPER) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG2D_FINAL + END SUBROUTINE FIELD_LOG2D_WRAPPER_FINAL + + SUBROUTINE FIELD_LOG2D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG2D_WRAPPER_PACKED) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG2D_FINAL + END SUBROUTINE FIELD_LOG2D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_LOG2D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG2D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_LOG2D_FINAL + END SUBROUTINE FIELD_LOG2D_OWNER_FINAL + + SUBROUTINE FIELD_LOG2D_ENSURE_HOST(SELF) + CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_LOG2D_ENSURE_HOST + + SUBROUTINE FIELD_LOG2D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):) => SELF%PTR (:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_LOG2D_GET_HOST_DATA + + SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_LOG2D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_LOG2D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(2) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):) => SELF%DEVPTR(:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA + + SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_LOG2D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) + + CALL FIELD_LOG2D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_LOG2D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%PTR(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_LOG2D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%BASE_PTR(:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:, SELF%FIDX,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_LOG2D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_LOG2D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_LOG2D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG2D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_LOG3D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG3D_WRAPPER), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 3)) THEN + CALL ABOR1 ('FIELD_LOG3D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_LOG3D_WRAP + + SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE(SELF) + CLASS(FIELD_LOG3D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE + + SUBROUTINE FIELD_LOG3D_UPDATE_HOST(SELF) + CLASS(FIELD_LOG3D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG3D_UPDATE_HOST + + SUBROUTINE FIELD_LOG3D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG3D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA(:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_LOG3D_WRAP_PACKED + + SUBROUTINE FIELD_LOG3D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_LOG3D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(3) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(3) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(3) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(3) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(3) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 3 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(3) = 1 + REAL_UBOUNDS(3) = UBOUNDS(3) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & SIZEOF(.TRUE._JPLM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_LOG3D_ALLOCATE + + FUNCTION FIELD_LOG3D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_LOG3D) :: SELF + LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):) => SELF%PTR(:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:) = .FALSE. + END IF + END FUNCTION FIELD_LOG3D_GET_VIEW + + SUBROUTINE FIELD_LOG3D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_LOG3D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_LOG3D_DELETE_DEVICE + + SUBROUTINE FIELD_LOG3D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG3D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_LOG3D_FINAL + + SUBROUTINE FIELD_LOG3D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG3D_WRAPPER) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG3D_FINAL + END SUBROUTINE FIELD_LOG3D_WRAPPER_FINAL + + SUBROUTINE FIELD_LOG3D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG3D_WRAPPER_PACKED) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG3D_FINAL + END SUBROUTINE FIELD_LOG3D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_LOG3D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG3D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_LOG3D_FINAL + END SUBROUTINE FIELD_LOG3D_OWNER_FINAL + + SUBROUTINE FIELD_LOG3D_ENSURE_HOST(SELF) + CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_LOG3D_ENSURE_HOST + + SUBROUTINE FIELD_LOG3D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%PTR (:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF + + END SUBROUTINE FIELD_LOG3D_GET_HOST_DATA + + SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_LOG3D_GET_HOST_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDONLY + + SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_LOG3D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDWR + + SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(3) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%DEVPTR(:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA + + SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_LOG3D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) + + CALL FIELD_LOG3D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_LOG3D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%PTR(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_LOG3D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%BASE_PTR(:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_LOG3D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_LOG3D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_LOG3D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG3D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_LOG4D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG4D_WRAPPER), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 4)) THEN + CALL ABOR1 ('FIELD_LOG4D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_LOG4D_WRAP + + SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE(SELF) + CLASS(FIELD_LOG4D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE + + SUBROUTINE FIELD_LOG4D_UPDATE_HOST(SELF) + CLASS(FIELD_LOG4D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG4D_UPDATE_HOST + + SUBROUTINE FIELD_LOG4D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG4D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) + LOGICAL :: LLPERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA(:,:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,:,IDX,:) + ENDIF + SELF%THREAD_BUFFER = .FALSE. + SELF%ISTATUS = NHSTFRESH + + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + END SUBROUTINE FIELD_LOG4D_WRAP_PACKED + + SUBROUTINE FIELD_LOG4D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) + ! Create FIELD object by explicitly allocating new data + CLASS(FIELD_LOG4D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(4) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(4) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(4) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(4) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(4) = OML_MAX_THREADS() + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + + SELF%LAST_CONTIGUOUS_DIMENSION = 4 + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + SELF%THREAD_BUFFER = .FALSE. + REAL_LBOUNDS(4) = 1 + REAL_UBOUNDS(4) = UBOUNDS(4) + END IF + END IF + + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * SIZEOF(.TRUE._JPLM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):) => SELF%DATA + + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_LOG4D_ALLOCATE + + FUNCTION FIELD_LOG4D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_LOG4D) :: SELF + LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):) => SELF%PTR(:,:,:,IDX) + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:) = .FALSE. + END IF + END FUNCTION FIELD_LOG4D_GET_VIEW + + SUBROUTINE FIELD_LOG4D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_LOG4D) :: SELF + + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_LOG4D_DELETE_DEVICE + + SUBROUTINE FIELD_LOG4D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG4D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_LOG4D_FINAL + + SUBROUTINE FIELD_LOG4D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG4D_WRAPPER) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG4D_FINAL + END SUBROUTINE FIELD_LOG4D_WRAPPER_FINAL + + SUBROUTINE FIELD_LOG4D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG4D_WRAPPER_PACKED) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG4D_FINAL + END SUBROUTINE FIELD_LOG4D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_LOG4D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG4D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_LOG4D_FINAL + END SUBROUTINE FIELD_LOG4D_OWNER_FINAL + + SUBROUTINE FIELD_LOG4D_ENSURE_HOST(SELF) + CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + + END SUBROUTINE FIELD_LOG4D_ENSURE_HOST - SELF%PTR => DATA(:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - END FUNCTION FIELD_2D_WRAP_PACKED + SUBROUTINE FIELD_LOG4D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(4) - FUNCTION FIELD_3D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%PTR (:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF - integer(kind=jpim) :: arrsize, istat - type(c_ptr) :: hptr + END SUBROUTINE FIELD_LOG4D_GET_HOST_DATA - SELF%PTR => DATA(:,:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 3) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX + SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - ! arrsize = SIZE(SELF%PTR, 1) * SIZE(SELF%PTR, 2) * SELF%NBLOCKS * sizeof(1.0_JPRB) - ! istat = cudaSetDeviceFlags(cudadevicemaphost) - ! istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - ! call c_f_pointer(hptr, self%data, [SIZE(SELF%PTR, 1), SIZE(SELF%PTR, 2), SELF%NBLOCKS] ) + CALL FIELD_LOG4D_GET_HOST_DATA (SELF, NRD, PTR) - END FUNCTION FIELD_3D_WRAP_PACKED + END SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDONLY - FUNCTION FIELD_4D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX + SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - SELF%PTR => DATA(:,:,:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 4) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - END FUNCTION FIELD_4D_WRAP_PACKED + CALL FIELD_LOG4D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - FUNCTION FIELD_INT2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX + END SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDWR - SELF%PTR => DATA(:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - END FUNCTION FIELD_INT2D_WRAP_PACKED + SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(4) + + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%DEVPTR(:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF + + END SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA + + SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_LOG4D_GET_DEVICE_DATA (SELF, NRD, PTR) + + END SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDONLY + + SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) + + CALL FIELD_LOG4D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) + + END SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDWR + + + SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_LOG4D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%PTR(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER + + SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_LOG4D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER + + SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) + + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER_PACKED + + SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER_PACKED + + SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_LOG4D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - FUNCTION FIELD_LOG2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_LOG2D), TARGET :: SELF - LOGICAL, TARGET, INTENT(IN) :: DATA(:,:,:) + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_LOG4D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_LOG4D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG4D_UPDATE_HOST_OWNER + + SUBROUTINE FIELD_LOG5D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG5D_WRAPPER), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) + LOGICAL :: LLPERSISTENT + + LLPERSISTENT = .TRUE. + IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT + + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA + ELSE + SELF%PTR => DATA + ENDIF + SELF%THREAD_BUFFER = .NOT. LLPERSISTENT + SELF%ISTATUS = NHSTFRESH + + IF (.NOT. LLPERSISTENT) THEN + IF (OML_MAX_THREADS() /= SIZE (DATA, 5)) THEN + CALL ABOR1 ('FIELD_LOG5D_WRAP: DIMENSION MISMATCH') + ENDIF + ENDIF + + END SUBROUTINE FIELD_LOG5D_WRAP + + SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE(SELF) + CLASS(FIELD_LOG5D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE + + SUBROUTINE FIELD_LOG5D_UPDATE_HOST(SELF) + CLASS(FIELD_LOG5D), INTENT(INOUT) :: SELF + + PRINT *, "Should never arrive here" + ERROR STOP + + END SUBROUTINE FIELD_LOG5D_UPDATE_HOST + + SUBROUTINE FIELD_LOG5D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) + ! Create FIELD object by wrapping existing data + CLASS(FIELD_LOG5D_WRAPPER_PACKED), INTENT(INOUT) :: SELF + LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:,:) INTEGER(KIND=JPIM), INTENT(IN) :: IDX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) + LOGICAL :: LLPERSISTENT - SELF%PTR => DATA(:,IDX,:) - SELF%ACTIVE = .TRUE. + IF (PRESENT(LBOUNDS)) THEN + SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA(:,:,:,:,IDX,:) + ELSE + SELF%PTR => DATA(:,:,:,:,IDX,:) + ENDIF SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) + SELF%ISTATUS = NHSTFRESH + SELF%BASE_PTR => DATA SELF%FIDX = IDX - END FUNCTION FIELD_LOG2D_WRAP_PACKED - FUNCTION FIELD_2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) + END SUBROUTINE FIELD_LOG5D_WRAP_PACKED + + SUBROUTINE FIELD_LOG5D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + CLASS(FIELD_LOG5D_OWNER) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(5) + INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(5) LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK + INTEGER(KIND=JPIM) :: REAL_LBOUNDS(5) + INTEGER(KIND=JPIM) :: REAL_UBOUNDS(5) + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr + REAL_LBOUNDS=LBOUNDS + REAL_UBOUNDS=UBOUNDS + REAL_UBOUNDS(5) = OML_MAX_THREADS() ! By default we allocate thread-local temporaries SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() + + SELF%LAST_CONTIGUOUS_DIMENSION = 5 IF (PRESENT(PERSISTENT)) THEN IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_2D_ALLOCATE : NBLOCKS not given for persistent allocation!') SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS + REAL_LBOUNDS(5) = 1 + REAL_UBOUNDS(5) = UBOUNDS(5) END IF END IF - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) + ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& + & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * (REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1) * SIZEOF(.TRUE._JPLM) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& + &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1,REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1]) + SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):,REAL_LBOUNDS(5):) => SELF%DATA - arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) + SELF%ISTATUS = NHSTFRESH + END SUBROUTINE FIELD_LOG5D_ALLOCATE - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_2D_ALLOCATE - - FUNCTION FIELD_3D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK + FUNCTION FIELD_LOG5D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) + CLASS(FIELD_LOG5D) :: SELF + LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + INTEGER(KIND=JPIM) :: LBOUNDS(5) - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() + LBOUNDS=LBOUND(SELF%PTR) + VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):,LBOUNDS(4):) => SELF%PTR(:,:,:,:,IDX) - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_3D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:,:) = .FALSE. END IF + END FUNCTION FIELD_LOG5D_GET_VIEW - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),SHAPE(2),NBLK)) + SUBROUTINE FIELD_LOG5D_DELETE_DEVICE(SELF) + ! Delete the copy of this field on GPU device + CLASS(FIELD_LOG5D) :: SELF - arrsize = SHAPE(1) * SHAPE(2) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), SHAPE(2), NBLK] ) + IF (ASSOCIATED (SELF%DEVPTR)) THEN + DEALLOCATE (SELF%DEVDATA) + NULLIFY(SELF%DEVPTR) + ENDIF + END SUBROUTINE FIELD_LOG5D_DELETE_DEVICE - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 3) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_3D_ALLOCATE + SUBROUTINE FIELD_LOG5D_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG5D) :: SELF + CALL SELF%DELETE_DEVICE() + NULLIFY(SELF%PTR) + END SUBROUTINE FIELD_LOG5D_FINAL + + SUBROUTINE FIELD_LOG5D_WRAPPER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG5D_WRAPPER) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG5D_FINAL + END SUBROUTINE FIELD_LOG5D_WRAPPER_FINAL + + SUBROUTINE FIELD_LOG5D_WRAPPER_PACKED_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG5D_WRAPPER_PACKED) :: SELF + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:,:) + CALL SELF%GET_HOST_DATA_RDONLY(PTR) + CALL SELF%FIELD_LOG5D_FINAL + END SUBROUTINE FIELD_LOG5D_WRAPPER_PACKED_FINAL + + SUBROUTINE FIELD_LOG5D_OWNER_FINAL(SELF) + ! Finalizes field and deallocates owned data + CLASS(FIELD_LOG5D_OWNER) :: SELF + INTEGER(KIND=JPIM) :: ISTAT + + ISTAT = CUDAFREEHOST(SELF%HPTR) + NULLIFY(SELF%DATA) + CALL SELF%FIELD_LOG5D_FINAL + END SUBROUTINE FIELD_LOG5D_OWNER_FINAL + + SUBROUTINE FIELD_LOG5D_ENSURE_HOST(SELF) + CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF + + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF - FUNCTION FIELD_4D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_4D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK + END SUBROUTINE FIELD_LOG5D_ENSURE_HOST - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr + SUBROUTINE FIELD_LOG5D_GET_HOST_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(5) - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN + CALL SELF%UPDATE_HOST() + SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) + ENDIF + PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%PTR (:,:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) + ENDIF - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_4D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF + END SUBROUTINE FIELD_LOG5D_GET_HOST_DATA - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),SHAPE(2),SHAPE(3),NBLK)) + SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - arrsize = SHAPE(1) * SHAPE(2) * SHAPE(3) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), SHAPE(2), SHAPE(3), NBLK] ) + CALL FIELD_LOG5D_GET_HOST_DATA (SELF, NRD, PTR) - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 4) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_4D_ALLOCATE + END SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDONLY - FUNCTION FIELD_INT2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK + SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr + CALL FIELD_LOG5D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() + END SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDWR - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_INT2D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF + SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA (SELF, MODE, PTR) + CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF + INTEGER (KIND=JPIM), INTENT (IN) :: MODE + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) + INTEGER(KIND=JPIM) :: LBOUNDS(5) - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) + LBOUNDS=LBOUND(SELF%PTR) + IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN + IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + ENDIF + CALL SELF%UPDATE_DEVICE() + SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) + ENDIF + PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%DEVPTR(:,:,:,:,:) + IF (IAND (MODE, NWR) /= 0) THEN + SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) + ENDIF - arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) + END SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_INT2D_ALLOCATE + SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDONLY (SELF, PTR) + CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - FUNCTION FIELD_LOG2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_LOG2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK + CALL FIELD_LOG5D_GET_DEVICE_DATA (SELF, NRD, PTR) - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr + END SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDONLY - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() + SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDWR (SELF, PTR) + CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF + LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_LOG2D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF + CALL FIELD_LOG5D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) - - arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) - - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_LOG2D_ALLOCATE - - FUNCTION FIELD_2D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_2D) :: SELF - TYPE(FIELD_2D), POINTER :: NEWOBJ + END SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDWR - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_2D_CLONE - - FUNCTION FIELD_3D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_3D) :: SELF - TYPE(FIELD_3D), POINTER :: NEWOBJ - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_3D_CLONE - - FUNCTION FIELD_4D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_4D) :: SELF - TYPE(FIELD_4D), POINTER :: NEWOBJ + SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER(SELF) + CLASS(FIELD_LOG5D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_4D_CLONE - - FUNCTION FIELD_INT2D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_INT2D) :: SELF - TYPE(FIELD_INT2D), POINTER :: NEWOBJ + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_INT2D_CLONE - - FUNCTION FIELD_LOG2D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_LOG2D) :: SELF - TYPE(FIELD_LOG2D), POINTER :: NEWOBJ + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_LOG2D_CLONE + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%PTR(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - SUBROUTINE FIELD_2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,IDX) - END IF + END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF + SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER(SELF) + CLASS(FIELD_LOG5D_WRAPPER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_2D_UPDATE_VIEW + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS - SUBROUTINE FIELD_3D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,:,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,:,IDX) - END IF + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF + END SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_3D_UPDATE_VIEW + SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH - SUBROUTINE FIELD_4D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_4D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,:,:,IDX) - END IF + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:,:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_4D_UPDATE_VIEW + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - SUBROUTINE FIELD_INT2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,IDX) - END IF + END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER_PACKED - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF + SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER_PACKED(SELF) + CLASS(FIELD_LOG5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:) = 0.0_JPIM - END IF - END SUBROUTINE FIELD_INT2D_UPDATE_VIEW + NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + BLKSIZE = ARRSIZE/NBLOCKS + CALL CPU_TIME(START) + DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) + END DO + CALL CPU_TIME(FINISH) - SUBROUTINE FIELD_LOG2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_LOG2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + CALL ACC_UNMAP_DATA(SELF%DATA) + DEALLOCATE(SELF%DATA) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,IDX) - END IF + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF + END SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER_PACKED - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:) = .FALSE. - END IF - END SUBROUTINE FIELD_LOG2D_UPDATE_VIEW + SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_OWNER(SELF) + CLASS(FIELD_LOG5D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH - SUBROUTINE FIELD_2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF + CALL CPU_TIME(START) + CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) + CALL CPU_TIME(FINISH) - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_2D_EXTRACT_VIEW + HPTR = ACC_HOSTPTR(SELF%DEVDATA) + CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - SUBROUTINE FIELD_3D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_OWNER + + SUBROUTINE FIELD_LOG5D_UPDATE_HOST_OWNER(SELF) + CLASS(FIELD_LOG5D_OWNER), INTENT (INOUT) :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + TYPE(C_PTR) :: HPTR + REAL :: START, FINISH + + ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) + + CALL CPU_TIME(START) + CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) + CALL CPU_TIME(FINISH) + + CALL ACC_UNMAP_DATA(SELF%DATA) + + CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) + + END SUBROUTINE FIELD_LOG5D_UPDATE_HOST_OWNER + + + FUNCTION MALLOC_HOST_PINNED_2D(SHAPE, NBLOCKS) RESULT(PTR) + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:) + + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + TYPE(C_PTR) :: HPTR + + ARRSIZE = SHAPE(1) * NBLOCKS * SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), NBLOCKS] ) + + END FUNCTION + + FUNCTION MALLOC_HOST_PINNED_3D(SHAPE, NBLOCKS) RESULT(PTR) + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:,:) + + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + TYPE(C_PTR) :: HPTR + + ARRSIZE = SHAPE(1) * SHAPE(2) * NBLOCKS * SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), SHAPE(2), NBLOCKS] ) + + END FUNCTION + + FUNCTION MALLOC_HOST_PINNED_4D(SHAPE, NBLOCKS) RESULT(PTR) + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:,:,:) + + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + TYPE(C_PTR) :: HPTR - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF + ARRSIZE = SHAPE(1) * SHAPE(2) * SHAPE(3) * NBLOCKS * SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), SHAPE(2), SHAPE(3), NBLOCKS] ) - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_3D_EXTRACT_VIEW + END FUNCTION - SUBROUTINE FIELD_4D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + FUNCTION MALLOC_HOST_PINNED_5D(SHAPE, NBLOCKS) RESULT(PTR) + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(4) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:,:,:,:) - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE + TYPE(C_PTR) :: HPTR - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF + ARRSIZE = SHAPE(1) * SHAPE(2) * SHAPE(3) * SHAPE(4) * NBLOCKS * SIZEOF(1.0_JPRB) + ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) + ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) + CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), SHAPE(2), SHAPE(3), SHAPE(4), NBLOCKS] ) - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_4D_EXTRACT_VIEW + END FUNCTION - SUBROUTINE FIELD_INT2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + SUBROUTINE INC_CPU_TO_GPU_TRANSFER(SELF, START, FINISH) + CLASS(GPU_STATS), INTENT(INOUT) :: SELF + REAL, INTENT(IN) :: START, FINISH + SELF%TRANSFER_CPU_TO_GPU = SELF%TRANSFER_CPU_TO_GPU + 1 + SELF%TOTAL_TIME_TRANSFER_CPU_TO_GPU = SELF%TOTAL_TIME_TRANSFER_CPU_TO_GPU + FINISH - START + END SUBROUTINE - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF + SUBROUTINE INC_GPU_TO_CPU_TRANSFER(SELF, START, FINISH) + CLASS(GPU_STATS), INTENT(INOUT) :: SELF + REAL, INTENT(IN) :: START, FINISH + SELF%TRANSFER_GPU_TO_CPU = SELF%TRANSFER_GPU_TO_CPU + 1 + SELF%TOTAL_TIME_TRANSFER_GPU_TO_CPU = SELF%TOTAL_TIME_TRANSFER_GPU_TO_CPU + FINISH - START + END SUBROUTINE - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPIM - END IF - END SUBROUTINE FIELD_INT2D_EXTRACT_VIEW - SUBROUTINE FIELD_LOG2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_LOG2D), TARGET :: SELF - LOGICAL, POINTER, INTENT(INOUT) :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX +! +! HELPERS +! - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + INTEGER (KIND=JPIM) FUNCTION FIELD_2D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + REAL(KIND=JPRB), POINTER :: PTR (:,:) + INTEGER*8 :: ISTRIDE (2) + INTEGER (KIND=JPIM) :: J - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF + ISTRIDE (1) = KIND (PTR) + DO J = 2, 2 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = .FALSE. - END IF - END SUBROUTINE FIELD_LOG2D_EXTRACT_VIEW + JDIM = 0 + IF (LOC (PTR (2, 1)) - LOC (PTR (1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - FUNCTION FIELD_2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + JDIM = 1 - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + IF (LOC (PTR (1, 2)) - LOC (PTR (1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF + JDIM = 2 - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPRB - END IF - END FUNCTION FIELD_2D_GET_VIEW + END FUNCTION FIELD_2D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_3D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + REAL(KIND=JPRB), POINTER :: PTR (:,:,:) + INTEGER*8 :: ISTRIDE (3) + INTEGER (KIND=JPIM) :: J - FUNCTION FIELD_3D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + ISTRIDE (1) = KIND (PTR) + DO J = 2, 3 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + JDIM = 0 + IF (LOC (PTR (2, 1, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF + JDIM = 1 - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB - END IF - END FUNCTION FIELD_3D_GET_VIEW + IF (LOC (PTR (1, 2, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - FUNCTION FIELD_4D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + JDIM = 2 - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + IF (LOC (PTR (1, 1, 2)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF + JDIM = 3 - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB - END IF - END FUNCTION FIELD_4D_GET_VIEW + END FUNCTION FIELD_3D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_4D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + REAL(KIND=JPRB), POINTER :: PTR (:,:,:,:) + INTEGER*8 :: ISTRIDE (4) + INTEGER (KIND=JPIM) :: J - FUNCTION FIELD_INT2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + ISTRIDE (1) = KIND (PTR) + DO J = 2, 4 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + JDIM = 0 + IF (LOC (PTR (2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF + JDIM = 1 - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPIM - END IF - END FUNCTION FIELD_INT2D_GET_VIEW + IF (LOC (PTR (1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF + JDIM = 2 - FUNCTION FIELD_LOG2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_LOG2D), TARGET :: SELF - LOGICAL, POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX + IF (LOC (PTR (1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF + JDIM = 3 - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF + IF (LOC (PTR (1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (4)) THEN + RETURN + ENDIF - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = .FALSE. - END IF - END FUNCTION FIELD_LOG2D_GET_VIEW + JDIM = 4 + END FUNCTION FIELD_4D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_5D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + REAL(KIND=JPRB), POINTER :: PTR (:,:,:,:,:) + INTEGER*8 :: ISTRIDE (5) + INTEGER (KIND=JPIM) :: J - SUBROUTINE FIELD_2D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_2D), TARGET :: SELF + ISTRIDE (1) = KIND (PTR) + DO J = 2, 5 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_2D_CREATE_DEVICE + JDIM = 0 + IF (LOC (PTR (2, 1, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - SUBROUTINE FIELD_3D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE + JDIM = 1 - ARRSIZE = SIZE(SELF%PTR) * SIZEOF(1.0_JPRB) - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - CALL ACC_MAP_DATA(SELF%PTR, SELF%DEVDATA, ARRSIZE) + IF (LOC (PTR (1, 2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_3D_CREATE_DEVICE + JDIM = 2 - SUBROUTINE FIELD_4D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_4D), TARGET :: SELF + IF (LOC (PTR (1, 1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_4D_CREATE_DEVICE + JDIM = 3 - SUBROUTINE FIELD_INT2D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_INT2D), TARGET :: SELF + IF (LOC (PTR (1, 1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (4)) THEN + RETURN + ENDIF - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_INT2D_CREATE_DEVICE + JDIM = 4 - SUBROUTINE FIELD_LOG2D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_LOG2D), TARGET :: SELF + IF (LOC (PTR (1, 1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (5)) THEN + RETURN + ENDIF - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_LOG2D_CREATE_DEVICE + JDIM = 5 - FUNCTION FIELD_2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) + END FUNCTION FIELD_5D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_INT2D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + INTEGER(KIND=JPIM), POINTER :: PTR (:,:) + INTEGER*8 :: ISTRIDE (2) + INTEGER (KIND=JPIM) :: J - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF + ISTRIDE (1) = KIND (PTR) + DO J = 2, 2 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_2D_GET_DEVICE_DATA + JDIM = 0 + IF (LOC (PTR (2, 1)) - LOC (PTR (1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - FUNCTION FIELD_3D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) + JDIM = 1 - type(c_ptr) :: hptr + IF (LOC (PTR (1, 2)) - LOC (PTR (1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF + JDIM = 2 - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - hptr = acc_hostptr(self%devdata) - call c_f_pointer(hptr, devptr, shape(self%devdata)) - END IF - END FUNCTION FIELD_3D_GET_DEVICE_DATA + END FUNCTION FIELD_INT2D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_INT3D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + INTEGER(KIND=JPIM), POINTER :: PTR (:,:,:) + INTEGER*8 :: ISTRIDE (3) + INTEGER (KIND=JPIM) :: J - FUNCTION FIELD_4D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) + ISTRIDE (1) = KIND (PTR) + DO J = 2, 3 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF + JDIM = 0 + IF (LOC (PTR (2, 1, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_4D_GET_DEVICE_DATA + JDIM = 1 - FUNCTION FIELD_INT2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) + IF (LOC (PTR (1, 2, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF + JDIM = 2 - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_INT2D_GET_DEVICE_DATA + IF (LOC (PTR (1, 1, 2)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - FUNCTION FIELD_LOG2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_LOG2D), TARGET :: SELF - LOGICAL, POINTER, CONTIGUOUS :: DEVPTR(:,:) + JDIM = 3 - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF + END FUNCTION FIELD_INT3D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_INT4D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + INTEGER(KIND=JPIM), POINTER :: PTR (:,:,:,:) + INTEGER*8 :: ISTRIDE (4) + INTEGER (KIND=JPIM) :: J - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_LOG2D_GET_DEVICE_DATA + ISTRIDE (1) = KIND (PTR) + DO J = 2, 4 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - SUBROUTINE FIELD_2D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_2D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_2D_UPDATE_DEVICE + JDIM = 0 + IF (LOC (PTR (2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - SUBROUTINE FIELD_3D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL + JDIM = 1 - INTEGER(KIND=JPIM) :: istat, arrsize, blksize - type(c_ptr) :: hptr - integer(kind=jpim) :: shape(3) + IF (LOC (PTR (1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - logical :: pres + JDIM = 2 - arrsize = size(self%ptr) * sizeof(1.0_JPRB) - blksize = arrsize / self%nblocks - ALLOCATE(SELF%DEVDATA, mold=SELF%PTR) + IF (LOC (PTR (1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - IF (SELF%OWNED) THEN - call acc_map_data(self%data, self%devdata, arrsize) - call acc_memcpy_to_device(self%devdata(:,:,:), self%data(:,:,:), arrsize) + JDIM = 3 - ELSE - ! TODO: This is a dirty trick to fool the OpenACC runtime! - ! We allocate the associated data array (full size), so that we can - ! add it to the OpenACC host-device map (it's contiguous!) - ! Then, we copy the data in a strided fashio from the discontiguous pointer. - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - call acc_map_data(self%data, self%devdata, arrsize) - DO IBL=1, SELF%NBLOCKS - call acc_memcpy_to_device(self%devdata(:,:,ibl), self%base_ptr(:,:,self%fidx,ibl), blksize) - END DO - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_3D_UPDATE_DEVICE + IF (LOC (PTR (1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (4)) THEN + RETURN + ENDIF - SUBROUTINE FIELD_4D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_4D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:,:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,:,:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_4D_UPDATE_DEVICE + JDIM = 4 - SUBROUTINE FIELD_INT2D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE + END FUNCTION FIELD_INT4D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_INT5D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + INTEGER(KIND=JPIM), POINTER :: PTR (:,:,:,:,:) + INTEGER*8 :: ISTRIDE (5) + INTEGER (KIND=JPIM) :: J - SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_LOG2D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE + ISTRIDE (1) = KIND (PTR) + DO J = 2, 5 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - SUBROUTINE FIELD_2D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_2D) :: SELF - INTEGER(KIND=JPIM) :: IBL + JDIM = 0 + IF (LOC (PTR (2, 1, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - IF (SELF%OWNED) THEN - !$acc update host(SELF%DATA(:,:)) - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:) = SELF%DEVPTR(:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_2D_UPDATE_HOST + JDIM = 1 - SUBROUTINE FIELD_3D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_3D) :: SELF - INTEGER(KIND=JPIM) :: IBL + IF (LOC (PTR (1, 2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - INTEGER(KIND=JPIM) :: istat, arrsize, blksize - type(c_ptr) :: hptr + JDIM = 2 - arrsize = size(self%ptr) * sizeof(1.0_JPRB) - blksize = arrsize / self%nblocks + IF (LOC (PTR (1, 1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - IF (SELF%OWNED) THEN - call acc_memcpy_from_device(self%data(:,:,:), self%devdata(:,:,:), arrsize) - call acc_unmap_data(self%data) + JDIM = 3 - ELSE - ! call acc_memcpy_from_device(self%data(:,:,:), self%devdata(:,:,:), arrsize) - DO IBL=1, SELF%NBLOCKS - ! self%base_ptr(:,:,self%fidx,ibl) = self%data(:,:,ibl) - - ! call acc_memcpy_from_device(self%ptr(:,:,ibl), self%devdata(:,:,ibl), blksize) - call acc_memcpy_from_device(self%base_ptr(:,:,self%fidx,ibl), self%devdata(:,:,ibl), blksize) - END DO - call acc_unmap_data(self%data) - DEALLOCATE(SELF%DATA) - END IF + IF (LOC (PTR (1, 1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (4)) THEN + RETURN + ENDIF - DEALLOCATE(SELF%DEVDATA) - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_3D_UPDATE_HOST + JDIM = 4 - SUBROUTINE FIELD_4D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_4D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DATA(:,:,:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,:,:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:,:,:) = SELF%DEVPTR(:,:,:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_4D_UPDATE_HOST + IF (LOC (PTR (1, 1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (5)) THEN + RETURN + ENDIF - SUBROUTINE FIELD_INT2D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_INT2D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DATA(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:) = SELF%DEVPTR(:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_INT2D_UPDATE_HOST + JDIM = 5 - SUBROUTINE FIELD_LOG2D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_LOG2D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DATA(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:) = SELF%DEVPTR(:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_LOG2D_UPDATE_HOST - - SUBROUTINE FIELD_2D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_2D), TARGET :: SELF + END FUNCTION FIELD_INT5D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_LOG2D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + LOGICAL(KIND=JPLM), POINTER :: PTR (:,:) + INTEGER*8 :: ISTRIDE (2) + INTEGER (KIND=JPIM) :: J - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_2D_DELETE_DEVICE + ISTRIDE (1) = KIND (PTR) + DO J = 2, 2 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - SUBROUTINE FIELD_3D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_3D), TARGET :: SELF + JDIM = 0 + IF (LOC (PTR (2, 1)) - LOC (PTR (1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - IF (SELF%OWNED) THEN - CALL ACC_UNMAP_DATA(SELF%DATA) - ELSE - CALL ACC_UNMAP_DATA(SELF%PTR) - END IF - DEALLOCATE(SELF%DEVDATA) - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_3D_DELETE_DEVICE + JDIM = 1 - SUBROUTINE FIELD_4D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_4D), TARGET :: SELF + IF (LOC (PTR (1, 2)) - LOC (PTR (1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_4D_DELETE_DEVICE + JDIM = 2 - SUBROUTINE FIELD_INT2D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_INT2D), TARGET :: SELF + END FUNCTION FIELD_LOG2D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_LOG3D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + LOGICAL(KIND=JPLM), POINTER :: PTR (:,:,:) + INTEGER*8 :: ISTRIDE (3) + INTEGER (KIND=JPIM) :: J - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_INT2D_DELETE_DEVICE + ISTRIDE (1) = KIND (PTR) + DO J = 2, 3 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - SUBROUTINE FIELD_LOG2D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_LOG2D), TARGET :: SELF + JDIM = 0 + IF (LOC (PTR (2, 1, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_LOG2D_DELETE_DEVICE + JDIM = 1 - SUBROUTINE FIELD_2D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_2D), TARGET :: SELF + IF (LOC (PTR (1, 2, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_2D_ENSURE_HOST + JDIM = 2 - SUBROUTINE FIELD_3D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_3D), TARGET :: SELF + IF (LOC (PTR (1, 1, 2)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_3D_ENSURE_HOST + JDIM = 3 - SUBROUTINE FIELD_4D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_4D), TARGET :: SELF + END FUNCTION FIELD_LOG3D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_LOG4D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + LOGICAL(KIND=JPLM), POINTER :: PTR (:,:,:,:) + INTEGER*8 :: ISTRIDE (4) + INTEGER (KIND=JPIM) :: J - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_4D_ENSURE_HOST + ISTRIDE (1) = KIND (PTR) + DO J = 2, 4 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - SUBROUTINE FIELD_INT2D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_INT2D), TARGET :: SELF + JDIM = 0 + IF (LOC (PTR (2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_INT2D_ENSURE_HOST + JDIM = 1 - SUBROUTINE FIELD_LOG2D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_LOG2D), TARGET :: SELF + IF (LOC (PTR (1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_LOG2D_ENSURE_HOST + JDIM = 2 - SUBROUTINE FIELD_2D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_2D), TARGET :: SELF + IF (LOC (PTR (1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_2D_ENSURE_DEVICE + JDIM = 3 - SUBROUTINE FIELD_3D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_3D), TARGET :: SELF + IF (LOC (PTR (1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (4)) THEN + RETURN + ENDIF - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_3D_ENSURE_DEVICE + JDIM = 4 - SUBROUTINE FIELD_4D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_4D), TARGET :: SELF + END FUNCTION FIELD_LOG4D_GET_LAST_CONTIGUOUS_DIMENSION + INTEGER (KIND=JPIM) FUNCTION FIELD_LOG5D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + LOGICAL(KIND=JPLM), POINTER :: PTR (:,:,:,:,:) + INTEGER*8 :: ISTRIDE (5) + INTEGER (KIND=JPIM) :: J - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_4D_ENSURE_DEVICE + ISTRIDE (1) = KIND (PTR) + DO J = 2, 5 + ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) + ENDDO - SUBROUTINE FIELD_INT2D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_INT2D), TARGET :: SELF + JDIM = 0 + IF (LOC (PTR (2, 1, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (1)) THEN + RETURN + ENDIF - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_INT2D_ENSURE_DEVICE + JDIM = 1 - SUBROUTINE FIELD_LOG2D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_LOG2D), TARGET :: SELF + IF (LOC (PTR (1, 2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (2)) THEN + RETURN + ENDIF - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_LOG2D_ENSURE_DEVICE + JDIM = 2 - SUBROUTINE FIELD_2D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_2D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_2D_FINAL + IF (LOC (PTR (1, 1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (3)) THEN + RETURN + ENDIF - SUBROUTINE FIELD_3D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_3D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_3D_FINAL + JDIM = 3 - SUBROUTINE FIELD_4D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_4D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_4D_FINAL + IF (LOC (PTR (1, 1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (4)) THEN + RETURN + ENDIF - SUBROUTINE FIELD_INT2D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_INT2D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_INT2D_FINAL + JDIM = 4 - SUBROUTINE FIELD_LOG2D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_LOG2D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_LOG2D_FINAL + IF (LOC (PTR (1, 1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (5)) THEN + RETURN + ENDIF + + JDIM = 5 + + END FUNCTION FIELD_LOG5D_GET_LAST_CONTIGUOUS_DIMENSION END MODULE FIELD_MODULE diff --git a/src/common/module/field_module_orig.F90 b/src/common/module/field_module_orig.F90 new file mode 100644 index 00000000..e2268014 --- /dev/null +++ b/src/common/module/field_module_orig.F90 @@ -0,0 +1,2189 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE FIELD_MODULE + ! The FIELD types provided by this module provide data abstractions that + ! decouple data storage in memory from the data views used in thread-parallel + ! sections of the code. They are intended to thinly wrap ATLAS_FIELD + ! objects and provide additional features that may later be + ! incorporated into Atlas. They can also provide backward-compatibility + ! for non-Atlas execution modes. + +USE PARKIND1, ONLY: JPIM, JPRB +USE OML_MOD, ONLY: OML_MAX_THREADS, OML_MY_THREAD +USE IEEE_ARITHMETIC, ONLY: IEEE_SIGNALING_NAN + +USE CUDAFOR + +use openacc + +use iso_c_binding + +IMPLICIT NONE + +TYPE FIELD_2D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! The data view to be used in thread-parallel sections + ! + ! The underlying view pointer is of rank-1, since we always + ! the horizontal component as a single dimension. + REAL(KIND=JPRB), POINTER :: VIEW(:) => NULL() + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + REAL(KIND=JPRB), POINTER :: PTR(:,:) => NULL() + ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:) + ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:) + + ! For wrapping discontiguous fields in co-allocated storage + ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer + ! and integer index, to allow block pointer extraction that + ! conforms with CUDA device pointers in PGI. + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() + + ! Number of blocks used in the data layout + INTEGER :: NBLOCKS + + ! Flag indicating whether this field stores real data + LOGICAL :: ACTIVE = .FALSE. + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + ! Flag indicating whether we own the allocated base array + LOGICAL :: OWNED = .TRUE. + ! Flag indicating whether latest data currently resides on device + LOGICAL :: ON_DEVICE = .FALSE. + +CONTAINS + + PROCEDURE :: CLONE => FIELD_2D_CLONE + PROCEDURE :: UPDATE_VIEW => FIELD_2D_UPDATE_VIEW + PROCEDURE :: EXTRACT_VIEW => FIELD_2D_EXTRACT_VIEW + PROCEDURE :: GET_VIEW => FIELD_2D_GET_VIEW + PROCEDURE :: FINAL => FIELD_2D_FINAL + + ! GPU-specific device data transfer API + PROCEDURE :: CREATE_DEVICE => FIELD_2D_CREATE_DEVICE + PROCEDURE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE + PROCEDURE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST + PROCEDURE :: ENSURE_DEVICE => FIELD_2D_ENSURE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_2D_ENSURE_HOST + PROCEDURE :: DELETE_DEVICE => FIELD_2D_DELETE_DEVICE +END TYPE FIELD_2D + +TYPE FIELD_3D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! The data view to be used in thread-parallel sections + ! + ! The underlying view pointer is of rank-1, since we always + ! the horizontal component as a single dimension. + REAL(KIND=JPRB), POINTER :: VIEW(:,:) => NULL() + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + REAL(KIND=JPRB), POINTER :: PTR(:,:,:) => NULL() + ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:) + ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:,:) + + ! For wrapping discontiguous fields in co-allocated storage + ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer + ! and integer index, to allow block pointer extraction that + ! conforms with CUDA device pointers in PGI. + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX + + ! ! A separate data pointer that can be used to create + ! ! a contiguous chunk of host memory to cleanly map to + ! ! device, should the %DATA pointer be discontiguous. + ! REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() + + REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) + + ! Number of blocks used in the data layout + INTEGER :: NBLOCKS + + ! Flag indicating whether this field stores real data + LOGICAL :: ACTIVE = .FALSE. + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + ! Flag indicating whether we own the allocated base array + LOGICAL :: OWNED = .TRUE. + ! Flag indicating whether latest data currently resides on device + LOGICAL :: ON_DEVICE = .FALSE. + +CONTAINS + + PROCEDURE :: CLONE => FIELD_3D_CLONE + PROCEDURE :: UPDATE_VIEW => FIELD_3D_UPDATE_VIEW + PROCEDURE :: EXTRACT_VIEW => FIELD_3D_EXTRACT_VIEW + PROCEDURE :: GET_VIEW => FIELD_3D_GET_VIEW + PROCEDURE :: FINAL => FIELD_3D_FINAL + + ! GPU-specific device data transfer API + PROCEDURE :: CREATE_DEVICE => FIELD_3D_CREATE_DEVICE + PROCEDURE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE + PROCEDURE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST + PROCEDURE :: ENSURE_DEVICE => FIELD_3D_ENSURE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_3D_ENSURE_HOST + PROCEDURE :: DELETE_DEVICE => FIELD_3D_DELETE_DEVICE +END TYPE FIELD_3D + +TYPE FIELD_4D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! The data view to be used in thread-parallel sections + ! + ! The underlying view pointer is of rank-1, since we always + ! the horizontal component as a single dimension. + REAL(KIND=JPRB), POINTER :: VIEW(:,:,:) => NULL() + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) => NULL() + ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:,:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:,:) + ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:,:,:) + + ! For wrapping discontiguous fields in co-allocated storage + ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer + ! and integer index, to allow block pointer extraction that + ! conforms with CUDA device pointers in PGI. + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() + + ! Number of blocks used in the data layout + INTEGER :: NBLOCKS + + ! Flag indicating whether this field stores real data + LOGICAL :: ACTIVE = .FALSE. + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + ! Flag indicating whether we own the allocated base array + LOGICAL :: OWNED = .TRUE. + ! Flag indicating whether latest data currently resides on device + LOGICAL :: ON_DEVICE = .FALSE. + +CONTAINS + + PROCEDURE :: CLONE => FIELD_4D_CLONE + PROCEDURE :: UPDATE_VIEW => FIELD_4D_UPDATE_VIEW + PROCEDURE :: EXTRACT_VIEW => FIELD_4D_EXTRACT_VIEW + PROCEDURE :: GET_VIEW => FIELD_4D_GET_VIEW + PROCEDURE :: FINAL => FIELD_4D_FINAL + + ! GPU-specific device data transfer API + PROCEDURE :: CREATE_DEVICE => FIELD_4D_CREATE_DEVICE + PROCEDURE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE + PROCEDURE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST + PROCEDURE :: ENSURE_DEVICE => FIELD_4D_ENSURE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_4D_ENSURE_HOST + PROCEDURE :: DELETE_DEVICE => FIELD_4D_DELETE_DEVICE +END TYPE FIELD_4D + + +TYPE FIELD_INT2D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! The data view to be used in thread-parallel sections + ! + ! The underlying view pointer is of rank-1, since we always + ! the horizontal component as a single dimension. + INTEGER(KIND=JPIM), POINTER :: VIEW(:) => NULL() + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + INTEGER(KIND=JPIM), POINTER :: PTR(:,:) => NULL() + ! INTEGER(KIND=JPIM), ALLOCATABLE :: DATA(:,:) + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:) + ! INTEGER(KIND=JPIM), ALLOCATABLE, PINNED :: DATA(:,:) + + ! For wrapping discontiguous fields in co-allocated storage + ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer + ! and integer index, to allow block pointer extraction that + ! conforms with CUDA device pointers in PGI. + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() + + ! Number of blocks used in the data layout + INTEGER :: NBLOCKS + + ! Flag indicating whether this field stores real data + LOGICAL :: ACTIVE = .FALSE. + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + ! Flag indicating whether we own the allocated base array + LOGICAL :: OWNED = .TRUE. + ! Flag indicating whether latest data currently resides on device + LOGICAL :: ON_DEVICE = .FALSE. + +CONTAINS + + PROCEDURE :: CLONE => FIELD_INT2D_CLONE + PROCEDURE :: UPDATE_VIEW => FIELD_INT2D_UPDATE_VIEW + PROCEDURE :: EXTRACT_VIEW => FIELD_INT2D_EXTRACT_VIEW + PROCEDURE :: GET_VIEW => FIELD_INT2D_GET_VIEW + PROCEDURE :: FINAL => FIELD_INT2D_FINAL + + ! GPU-specific device data transfer API + PROCEDURE :: CREATE_DEVICE => FIELD_INT2D_CREATE_DEVICE + PROCEDURE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE + PROCEDURE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST + PROCEDURE :: ENSURE_DEVICE => FIELD_INT2D_ENSURE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_INT2D_ENSURE_HOST + PROCEDURE :: DELETE_DEVICE => FIELD_INT2D_DELETE_DEVICE +END TYPE FIELD_INT2D + + +TYPE FIELD_LOG2D + ! A FIELD encapsulates a single multi-dimensional array and can + ! provide block-indexed "views" of the data for automating the + ! allocation and parallel iterration of NPROMA blocks. + + ! The data view to be used in thread-parallel sections + ! + ! The underlying view pointer is of rank-1, since we always + ! the horizontal component as a single dimension. + LOGICAL, POINTER :: VIEW(:) => NULL() + + ! TODO: Atlas-based field data storage field + ! TODO: Do we still need to use pointers here? + ! TYPE(ATLAS_FIELD), POINTER :: DATA + + ! Storage pointer for non-Atlas backward-compatibility mode + ! + ! The underlying storage pointer has the rank as the dimension, + ! where the innermost dimension represents the horizontal and + ! the outermost one is the block index. + LOGICAL, POINTER :: PTR(:,:) => NULL() + ! LOGICAL, ALLOCATABLE :: DATA(:,:) + LOGICAL, POINTER, CONTIGUOUS :: DATA(:,:) + ! LOGICAL, ALLOCATABLE, PINNED :: DATA(:,:) + + ! For wrapping discontiguous fields in co-allocated storage + ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer + ! and integer index, to allow block pointer extraction that + ! conforms with CUDA device pointers in PGI. + LOGICAL, POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() + INTEGER(KIND=JPIM) :: FIDX + + ! A separate data pointer that can be used to create + ! a contiguous chunk of host memory to cleanly map to + ! device, should the %DATA pointer be discontiguous. + LOGICAL, POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() + + ! Number of blocks used in the data layout + INTEGER :: NBLOCKS + + ! Flag indicating whether this field stores real data + LOGICAL :: ACTIVE = .FALSE. + ! Flag indicating the use a single block-buffer per thread + LOGICAL :: THREAD_BUFFER = .FALSE. + ! Flag indicating whether we own the allocated base array + LOGICAL :: OWNED = .TRUE. + ! Flag indicating whether latest data currently resides on device + LOGICAL :: ON_DEVICE = .FALSE. + +CONTAINS + + PROCEDURE :: CLONE => FIELD_LOG2D_CLONE + PROCEDURE :: UPDATE_VIEW => FIELD_LOG2D_UPDATE_VIEW + PROCEDURE :: EXTRACT_VIEW => FIELD_LOG2D_EXTRACT_VIEW + PROCEDURE :: GET_VIEW => FIELD_LOG2D_GET_VIEW + PROCEDURE :: FINAL => FIELD_LOG2D_FINAL + + ! GPU-specific device data transfer API + PROCEDURE :: CREATE_DEVICE => FIELD_LOG2D_CREATE_DEVICE + PROCEDURE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE + PROCEDURE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST + PROCEDURE :: ENSURE_DEVICE => FIELD_LOG2D_ENSURE_DEVICE + PROCEDURE :: ENSURE_HOST => FIELD_LOG2D_ENSURE_HOST + PROCEDURE :: DELETE_DEVICE => FIELD_LOG2D_DELETE_DEVICE +END TYPE FIELD_LOG2D + + +TYPE FIELD_2D_PTR + ! Struct to hold references to field objects + TYPE(FIELD_2D), POINTER :: PTR => NULL() +END TYPE FIELD_2D_PTR + +TYPE FIELD_2D_VIEW + ! Struct to hold array views, so we can make arrays of them + REAL(KIND=JPRB), POINTER :: P(:) => NULL() +END TYPE FIELD_2D_VIEW +TYPE FIELD_3D_PTR + ! Struct to hold references to field objects + TYPE(FIELD_3D), POINTER :: PTR => NULL() +END TYPE FIELD_3D_PTR + +TYPE FIELD_3D_VIEW + ! Struct to hold array views, so we can make arrays of them + REAL(KIND=JPRB), POINTER :: P(:,:) => NULL() +END TYPE FIELD_3D_VIEW +TYPE FIELD_4D_PTR + ! Struct to hold references to field objects + TYPE(FIELD_4D), POINTER :: PTR => NULL() +END TYPE FIELD_4D_PTR + +TYPE FIELD_4D_VIEW + ! Struct to hold array views, so we can make arrays of them + REAL(KIND=JPRB), POINTER :: P(:,:,:) => NULL() +END TYPE FIELD_4D_VIEW + + +INTERFACE FIELD_2D + MODULE PROCEDURE :: FIELD_2D_WRAP + MODULE PROCEDURE :: FIELD_2D_WRAP_PACKED + ! MODULE PROCEDURE :: FIELD_2D_EMPTY + MODULE PROCEDURE :: FIELD_2D_ALLOCATE +END INTERFACE + +INTERFACE FIELD_3D + MODULE PROCEDURE :: FIELD_3D_WRAP + MODULE PROCEDURE :: FIELD_3D_WRAP_PACKED + ! MODULE PROCEDURE :: FIELD_3D_EMPTY + MODULE PROCEDURE :: FIELD_3D_ALLOCATE +END INTERFACE + +INTERFACE FIELD_4D + MODULE PROCEDURE :: FIELD_4D_WRAP + MODULE PROCEDURE :: FIELD_4D_WRAP_PACKED + ! MODULE PROCEDURE :: FIELD_4D_EMPTY + MODULE PROCEDURE :: FIELD_4D_ALLOCATE +END INTERFACE + + +INTERFACE FIELD_INT2D + MODULE PROCEDURE :: FIELD_INT2D_WRAP + MODULE PROCEDURE :: FIELD_INT2D_WRAP_PACKED + ! MODULE PROCEDURE :: FIELD_INT2D_EMPTY + MODULE PROCEDURE :: FIELD_INT2D_ALLOCATE +END INTERFACE + + +INTERFACE FIELD_LOG2D + MODULE PROCEDURE :: FIELD_LOG2D_WRAP + MODULE PROCEDURE :: FIELD_LOG2D_WRAP_PACKED + ! MODULE PROCEDURE :: FIELD_LOG2D_EMPTY + MODULE PROCEDURE :: FIELD_LOG2D_ALLOCATE +END INTERFACE + + +INTERFACE FILL_BUFFER + MODULE PROCEDURE :: FILL_BUFFER_2D, FILL_BUFFER_3D, FILL_BUFFER_4D + MODULE PROCEDURE :: FILL_BUFFER_INT2D, FILL_BUFFER_LOG2D +END INTERFACE FILL_BUFFER + +INTERFACE FIELD_CREATE_DEVICE + MODULE PROCEDURE :: FIELD_2D_CREATE_DEVICE + MODULE PROCEDURE :: FIELD_3D_CREATE_DEVICE + MODULE PROCEDURE :: FIELD_4D_CREATE_DEVICE + MODULE PROCEDURE :: FIELD_INT2D_CREATE_DEVICE + MODULE PROCEDURE :: FIELD_LOG2D_CREATE_DEVICE +END INTERFACE FIELD_CREATE_DEVICE + +INTERFACE FIELD_UPDATE_DEVICE + MODULE PROCEDURE :: FIELD_2D_UPDATE_DEVICE + MODULE PROCEDURE :: FIELD_3D_UPDATE_DEVICE + MODULE PROCEDURE :: FIELD_4D_UPDATE_DEVICE + MODULE PROCEDURE :: FIELD_INT2D_UPDATE_DEVICE + MODULE PROCEDURE :: FIELD_LOG2D_UPDATE_DEVICE +END INTERFACE FIELD_UPDATE_DEVICE + +INTERFACE FIELD_UPDATE_HOST + MODULE PROCEDURE :: FIELD_2D_UPDATE_HOST + MODULE PROCEDURE :: FIELD_3D_UPDATE_HOST + MODULE PROCEDURE :: FIELD_4D_UPDATE_HOST + MODULE PROCEDURE :: FIELD_INT2D_UPDATE_HOST + MODULE PROCEDURE :: FIELD_LOG2D_UPDATE_HOST +END INTERFACE FIELD_UPDATE_HOST + +INTERFACE FIELD_DELETE_DEVICE + MODULE PROCEDURE :: FIELD_2D_DELETE_DEVICE + MODULE PROCEDURE :: FIELD_3D_DELETE_DEVICE + MODULE PROCEDURE :: FIELD_4D_DELETE_DEVICE + MODULE PROCEDURE :: FIELD_INT2D_DELETE_DEVICE + MODULE PROCEDURE :: FIELD_LOG2D_DELETE_DEVICE +END INTERFACE FIELD_DELETE_DEVICE + +INTERFACE GET_DEVICE_DATA + MODULE PROCEDURE :: FIELD_2D_GET_DEVICE_DATA + MODULE PROCEDURE :: FIELD_3D_GET_DEVICE_DATA + MODULE PROCEDURE :: FIELD_4D_GET_DEVICE_DATA + MODULE PROCEDURE :: FIELD_INT2D_GET_DEVICE_DATA + MODULE PROCEDURE :: FIELD_LOG2D_GET_DEVICE_DATA +END INTERFACE GET_DEVICE_DATA + +INTERFACE FIELD_ENSURE_DEVICE + MODULE PROCEDURE :: FIELD_2D_ENSURE_DEVICE + MODULE PROCEDURE :: FIELD_3D_ENSURE_DEVICE + MODULE PROCEDURE :: FIELD_4D_ENSURE_DEVICE + MODULE PROCEDURE :: FIELD_INT2D_ENSURE_DEVICE + MODULE PROCEDURE :: FIELD_LOG2D_ENSURE_DEVICE +END INTERFACE FIELD_ENSURE_DEVICE + +INTERFACE FIELD_ENSURE_HOST + MODULE PROCEDURE :: FIELD_2D_ENSURE_HOST + MODULE PROCEDURE :: FIELD_3D_ENSURE_HOST + MODULE PROCEDURE :: FIELD_4D_ENSURE_HOST + MODULE PROCEDURE :: FIELD_INT2D_ENSURE_HOST + MODULE PROCEDURE :: FIELD_LOG2D_ENSURE_HOST +END INTERFACE FIELD_ENSURE_HOST + +CONTAINS + + function malloc_host_pinned_2d(shape, nblocks) result(ptr) + integer(kind=jpim), intent(in) :: shape(1) + integer(kind=jpim), intent(in), optional :: nblocks + real(kind=jprb), pointer, contiguous :: ptr(:,:) + + integer(kind=jpim) :: istat, arrsize + type(c_ptr) :: hptr + + arrsize = shape(1) * nblocks * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" + call c_f_pointer(hptr, ptr, [shape(1), nblocks] ) + end function malloc_host_pinned_2d + + function malloc_host_pinned_3d(shape, nblocks) result(ptr) + integer(kind=jpim), intent(in) :: shape(2) + integer(kind=jpim), intent(in), optional :: nblocks + real(kind=jprb), pointer, contiguous :: ptr(:,:,:) + + integer(kind=jpim) :: istat, arrsize + type(c_ptr) :: hptr + + arrsize = shape(1) * shape(2) * nblocks * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" + call c_f_pointer(hptr, ptr, [shape(1), shape(2), nblocks] ) + end function malloc_host_pinned_3d + + function malloc_host_pinned_4d(shape, nblocks) result(ptr) + integer(kind=jpim), intent(in) :: shape(3) + integer(kind=jpim), intent(in), optional :: nblocks + real(kind=jprb), pointer, contiguous :: ptr(:,:,:,:) + + integer(kind=jpim) :: istat, arrsize + type(c_ptr) :: hptr + + arrsize = shape(1) * shape(2) * shape(3) * nblocks * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" + call c_f_pointer(hptr, ptr, [shape(1), shape(2), shape(3), nblocks] ) + end function malloc_host_pinned_4d + + + SUBROUTINE FILL_BUFFER_2D(BUFFER, INDEX) + ! Utility routine to fill data buffers (views) + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:) + INTEGER(KIND=JPIM), INTENT(IN) :: INDEX + INTEGER(KIND=JPIM) :: IDX + + IDX = INDEX+1 + BUFFER(IDX:) = BUFFER(INDEX) + END SUBROUTINE FILL_BUFFER_2D + + SUBROUTINE FILL_BUFFER_3D(BUFFER, INDEX) + ! Utility routine to fill data buffers (views) + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: INDEX + INTEGER(KIND=JPIM) :: I, IDX + + IDX = INDEX+1 + DO I=1, SIZE(BUFFER, 2) + BUFFER(IDX:,I) = BUFFER(INDEX,I) + END DO + END SUBROUTINE FILL_BUFFER_3D + + SUBROUTINE FILL_BUFFER_4D(BUFFER, INDEX) + ! Utility routine to fill data buffers (views) + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: INDEX + INTEGER(KIND=JPIM) :: I, J, IDX + + IDX = INDEX+1 + DO I=1, SIZE(BUFFER, 2) + DO J=1, SIZE(BUFFER, 3) + BUFFER(IDX:,I,J) = BUFFER(INDEX,I,J) + END DO + END DO + END SUBROUTINE FILL_BUFFER_4D + + SUBROUTINE FILL_BUFFER_INT2D(BUFFER, INDEX) + ! Utility routine to fill data buffers (views) + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: BUFFER(:) + INTEGER(KIND=JPIM), INTENT(IN) :: INDEX + INTEGER(KIND=JPIM) :: IDX + + IDX = INDEX+1 + BUFFER(IDX:) = BUFFER(INDEX) + END SUBROUTINE FILL_BUFFER_INT2D + + SUBROUTINE FILL_BUFFER_LOG2D(BUFFER, INDEX) + ! Utility routine to fill data buffers (views) + LOGICAL, POINTER, INTENT(INOUT) :: BUFFER(:) + INTEGER(KIND=JPIM), INTENT(IN) :: INDEX + INTEGER(KIND=JPIM) :: IDX + + IDX = INDEX+1 + BUFFER(IDX:) = BUFFER(INDEX) + END SUBROUTINE FILL_BUFFER_LOG2D + + FUNCTION FIELD_2D_EMPTY(SHAPE) RESULT(SELF) + ! Create FIELD object by wrapping existing data + ! + ! If a SHAPE is provided, a single empty buffer block-sized buffer + ! will be allocated under %VIEW and used by all threads in a + ! thread-parallel region to avoid segfault when dereferencing NULL + ! pointers. Otherwise %DATA and %VIEW will always be unassociated. + TYPE(FIELD_2D) :: SELF + INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) + + SELF%PTR => NULL() + IF (PRESENT(SHAPE)) THEN + ALLOCATE(SELF%VIEW(SHAPE(1))) + END IF + SELF%ACTIVE = .FALSE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = 0 + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_2D_EMPTY + + FUNCTION FIELD_3D_EMPTY(SHAPE) RESULT(SELF) + ! Create FIELD object by wrapping existing data + ! + ! If a SHAPE is provided, a single empty buffer block-sized buffer + ! will be allocated under %VIEW and used by all threads in a + ! thread-parallel region to avoid segfault when dereferencing NULL + ! pointers. Otherwise %DATA and %VIEW will always be unassociated. + TYPE(FIELD_3D) :: SELF + INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(2) + + SELF%PTR => NULL() + IF (PRESENT(SHAPE)) THEN + ALLOCATE(SELF%VIEW(SHAPE(1),SHAPE(2))) + END IF + SELF%ACTIVE = .FALSE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = 0 + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_3D_EMPTY + + FUNCTION FIELD_4D_EMPTY(SHAPE) RESULT(SELF) + ! Create FIELD object by wrapping existing data + ! + ! If a SHAPE is provided, a single empty buffer block-sized buffer + ! will be allocated under %VIEW and used by all threads in a + ! thread-parallel region to avoid segfault when dereferencing NULL + ! pointers. Otherwise %DATA and %VIEW will always be unassociated. + TYPE(FIELD_4D) :: SELF + INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(3) + + SELF%PTR => NULL() + IF (PRESENT(SHAPE)) THEN + ALLOCATE(SELF%VIEW(SHAPE(1),SHAPE(2),SHAPE(3))) + END IF + SELF%ACTIVE = .FALSE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = 0 + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_4D_EMPTY + + FUNCTION FIELD_INT2D_EMPTY(SHAPE) RESULT(SELF) + ! Create FIELD object by wrapping existing data + ! + ! If a SHAPE is provided, a single empty buffer block-sized buffer + ! will be allocated under %VIEW and used by all threads in a + ! thread-parallel region to avoid segfault when dereferencing NULL + ! pointers. Otherwise %DATA and %VIEW will always be unassociated. + TYPE(FIELD_INT2D) :: SELF + INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) + + SELF%PTR => NULL() + IF (PRESENT(SHAPE)) THEN + ALLOCATE(SELF%VIEW(SHAPE(1))) + END IF + SELF%ACTIVE = .FALSE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = 0 + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_INT2D_EMPTY + + FUNCTION FIELD_LOG2D_EMPTY(SHAPE) RESULT(SELF) + ! Create FIELD object by wrapping existing data + ! + ! If a SHAPE is provided, a single empty buffer block-sized buffer + ! will be allocated under %VIEW and used by all threads in a + ! thread-parallel region to avoid segfault when dereferencing NULL + ! pointers. Otherwise %DATA and %VIEW will always be unassociated. + TYPE(FIELD_LOG2D) :: SELF + INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) + + SELF%PTR => NULL() + IF (PRESENT(SHAPE)) THEN + ALLOCATE(SELF%VIEW(SHAPE(1))) + END IF + SELF%ACTIVE = .FALSE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = 0 + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_LOG2D_EMPTY + + FUNCTION FIELD_2D_WRAP(DATA) RESULT(SELF) + ! Create FIELD object by wrapping existing data + TYPE(FIELD_2D), TARGET :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:) + + SELF%PTR => DATA + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 2) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_2D_WRAP + + FUNCTION FIELD_3D_WRAP(DATA) RESULT(SELF) + ! Create FIELD object by wrapping existing data + TYPE(FIELD_3D), TARGET :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) + + SELF%PTR => DATA + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 3) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_3D_WRAP + + FUNCTION FIELD_4D_WRAP(DATA) RESULT(SELF) + ! Create FIELD object by wrapping existing data + TYPE(FIELD_4D), TARGET :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) + + SELF%PTR => DATA + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 4) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_4D_WRAP + + FUNCTION FIELD_INT2D_WRAP(DATA) RESULT(SELF) + ! Create FIELD object by wrapping existing data + TYPE(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:) + + SELF%PTR => DATA + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 2) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_INT2D_WRAP + + FUNCTION FIELD_LOG2D_WRAP(DATA) RESULT(SELF) + ! Create FIELD object by wrapping existing data + TYPE(FIELD_LOG2D), TARGET :: SELF + LOGICAL, TARGET, INTENT(IN) :: DATA(:,:) + + SELF%PTR => DATA + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 2) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_LOG2D_WRAP + + FUNCTION FIELD_2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) + ! Create FIELD object packed in a multi-field buffer by storing a + ! contiguous pointer to existing data and an index. + TYPE(FIELD_2D), TARGET :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + + SELF%PTR => DATA(:,IDX,:) + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 2) + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + END FUNCTION FIELD_2D_WRAP_PACKED + + FUNCTION FIELD_3D_WRAP_PACKED(DATA, IDX) RESULT(SELF) + ! Create FIELD object packed in a multi-field buffer by storing a + ! contiguous pointer to existing data and an index. + TYPE(FIELD_3D), TARGET :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + + integer(kind=jpim) :: arrsize, istat + type(c_ptr) :: hptr + + SELF%PTR => DATA(:,:,IDX,:) + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 3) + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + + ! arrsize = SIZE(SELF%PTR, 1) * SIZE(SELF%PTR, 2) * SELF%NBLOCKS * sizeof(1.0_JPRB) + ! istat = cudaSetDeviceFlags(cudadevicemaphost) + ! istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + ! call c_f_pointer(hptr, self%data, [SIZE(SELF%PTR, 1), SIZE(SELF%PTR, 2), SELF%NBLOCKS] ) + + END FUNCTION FIELD_3D_WRAP_PACKED + + FUNCTION FIELD_4D_WRAP_PACKED(DATA, IDX) RESULT(SELF) + ! Create FIELD object packed in a multi-field buffer by storing a + ! contiguous pointer to existing data and an index. + TYPE(FIELD_4D), TARGET :: SELF + REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + + SELF%PTR => DATA(:,:,:,IDX,:) + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 4) + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + END FUNCTION FIELD_4D_WRAP_PACKED + + FUNCTION FIELD_INT2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) + ! Create FIELD object packed in a multi-field buffer by storing a + ! contiguous pointer to existing data and an index. + TYPE(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + + SELF%PTR => DATA(:,IDX,:) + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 2) + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + END FUNCTION FIELD_INT2D_WRAP_PACKED + + FUNCTION FIELD_LOG2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) + ! Create FIELD object packed in a multi-field buffer by storing a + ! contiguous pointer to existing data and an index. + TYPE(FIELD_LOG2D), TARGET :: SELF + LOGICAL, TARGET, INTENT(IN) :: DATA(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: IDX + + SELF%PTR => DATA(:,IDX,:) + SELF%ACTIVE = .TRUE. + SELF%THREAD_BUFFER = .FALSE. + SELF%OWNED = .FALSE. + SELF%NBLOCKS = SIZE(SELF%PTR, 2) + SELF%BASE_PTR => DATA + SELF%FIDX = IDX + END FUNCTION FIELD_LOG2D_WRAP_PACKED + + FUNCTION FIELD_2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) + ! Create FIELD object by explicitly allocating new data + ! + ! Please note that SHAPE is the conceptual shape without the block dimension + TYPE(FIELD_2D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: NBLK + + INTEGER(KIND=JPIM) :: istat, arrsize + type(c_ptr) :: hptr + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + NBLK = OML_MAX_THREADS() + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + ! Adjust outer dim for full-sized persistent blocked arrays + IF (.NOT. PRESENT(NBLOCKS)) CALL & + & ABOR1('FIELD_2D_ALLOCATE : NBLOCKS not given for persistent allocation!') + SELF%THREAD_BUFFER = .FALSE. + NBLK = NBLOCKS + END IF + END IF + + ! Allocate storage array and store metadata + ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) + + arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) + + SELF%PTR => SELF%DATA + SELF%ACTIVE = .TRUE. + SELF%OWNED = .TRUE. + SELF%NBLOCKS = SIZE(SELF%DATA, 2) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_2D_ALLOCATE + + FUNCTION FIELD_3D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) + ! Create FIELD object by explicitly allocating new data + ! + ! Please note that SHAPE is the conceptual shape without the block dimension + TYPE(FIELD_3D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: NBLK + + INTEGER(KIND=JPIM) :: istat, arrsize + type(c_ptr) :: hptr + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + NBLK = OML_MAX_THREADS() + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + ! Adjust outer dim for full-sized persistent blocked arrays + IF (.NOT. PRESENT(NBLOCKS)) CALL & + & ABOR1('FIELD_3D_ALLOCATE : NBLOCKS not given for persistent allocation!') + SELF%THREAD_BUFFER = .FALSE. + NBLK = NBLOCKS + END IF + END IF + + ! Allocate storage array and store metadata + ! ALLOCATE(SELF%DATA(SHAPE(1),SHAPE(2),NBLK)) + + arrsize = SHAPE(1) * SHAPE(2) * NBLK * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + call c_f_pointer(hptr, self%data, [SHAPE(1), SHAPE(2), NBLK] ) + + SELF%PTR => SELF%DATA + SELF%ACTIVE = .TRUE. + SELF%OWNED = .TRUE. + SELF%NBLOCKS = SIZE(SELF%DATA, 3) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_3D_ALLOCATE + + FUNCTION FIELD_4D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) + ! Create FIELD object by explicitly allocating new data + ! + ! Please note that SHAPE is the conceptual shape without the block dimension + TYPE(FIELD_4D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: NBLK + + INTEGER(KIND=JPIM) :: istat, arrsize + type(c_ptr) :: hptr + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + NBLK = OML_MAX_THREADS() + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + ! Adjust outer dim for full-sized persistent blocked arrays + IF (.NOT. PRESENT(NBLOCKS)) CALL & + & ABOR1('FIELD_4D_ALLOCATE : NBLOCKS not given for persistent allocation!') + SELF%THREAD_BUFFER = .FALSE. + NBLK = NBLOCKS + END IF + END IF + + ! Allocate storage array and store metadata + ! ALLOCATE(SELF%DATA(SHAPE(1),SHAPE(2),SHAPE(3),NBLK)) + + arrsize = SHAPE(1) * SHAPE(2) * SHAPE(3) * NBLK * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + call c_f_pointer(hptr, self%data, [SHAPE(1), SHAPE(2), SHAPE(3), NBLK] ) + + SELF%PTR => SELF%DATA + SELF%ACTIVE = .TRUE. + SELF%OWNED = .TRUE. + SELF%NBLOCKS = SIZE(SELF%DATA, 4) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_4D_ALLOCATE + + FUNCTION FIELD_INT2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) + ! Create FIELD object by explicitly allocating new data + ! + ! Please note that SHAPE is the conceptual shape without the block dimension + TYPE(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: NBLK + + INTEGER(KIND=JPIM) :: istat, arrsize + type(c_ptr) :: hptr + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + NBLK = OML_MAX_THREADS() + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + ! Adjust outer dim for full-sized persistent blocked arrays + IF (.NOT. PRESENT(NBLOCKS)) CALL & + & ABOR1('FIELD_INT2D_ALLOCATE : NBLOCKS not given for persistent allocation!') + SELF%THREAD_BUFFER = .FALSE. + NBLK = NBLOCKS + END IF + END IF + + ! Allocate storage array and store metadata + ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) + + arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) + + SELF%PTR => SELF%DATA + SELF%ACTIVE = .TRUE. + SELF%OWNED = .TRUE. + SELF%NBLOCKS = SIZE(SELF%DATA, 2) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_INT2D_ALLOCATE + + FUNCTION FIELD_LOG2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) + ! Create FIELD object by explicitly allocating new data + ! + ! Please note that SHAPE is the conceptual shape without the block dimension + TYPE(FIELD_LOG2D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS + LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT + INTEGER(KIND=JPIM) :: NBLK + + INTEGER(KIND=JPIM) :: istat, arrsize + type(c_ptr) :: hptr + + ! By default we allocate thread-local temporaries + SELF%THREAD_BUFFER = .TRUE. + NBLK = OML_MAX_THREADS() + + IF (PRESENT(PERSISTENT)) THEN + IF (PERSISTENT) THEN + ! Adjust outer dim for full-sized persistent blocked arrays + IF (.NOT. PRESENT(NBLOCKS)) CALL & + & ABOR1('FIELD_LOG2D_ALLOCATE : NBLOCKS not given for persistent allocation!') + SELF%THREAD_BUFFER = .FALSE. + NBLK = NBLOCKS + END IF + END IF + + ! Allocate storage array and store metadata + ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) + + arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) + istat = cudaSetDeviceFlags(cudadevicemaphost) + istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) + call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) + + SELF%PTR => SELF%DATA + SELF%ACTIVE = .TRUE. + SELF%OWNED = .TRUE. + SELF%NBLOCKS = SIZE(SELF%DATA, 2) + SELF%BASE_PTR => NULL() + SELF%FIDX = -1 + END FUNCTION FIELD_LOG2D_ALLOCATE + + FUNCTION FIELD_2D_CLONE(SELF) RESULT(NEWOBJ) + ! Clone (deep-copy) this FIELD object, keeping the DATA pointer + ! intact, but replicating view pointers. + CLASS(FIELD_2D) :: SELF + TYPE(FIELD_2D), POINTER :: NEWOBJ + + ALLOCATE(NEWOBJ) + ! For owned storage data, re-allocate but do not copy data over + IF (SELF%OWNED) THEN + ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) + NEWOBJ%PTR => NEWOBJ%DATA + ELSE + NEWOBJ%PTR => SELF%PTR + END IF + NEWOBJ%VIEW => NULL() + NEWOBJ%NBLOCKS = SELF%NBLOCKS + NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER + NEWOBJ%OWNED = .FALSE. + END FUNCTION FIELD_2D_CLONE + + FUNCTION FIELD_3D_CLONE(SELF) RESULT(NEWOBJ) + ! Clone (deep-copy) this FIELD object, keeping the DATA pointer + ! intact, but replicating view pointers. + CLASS(FIELD_3D) :: SELF + TYPE(FIELD_3D), POINTER :: NEWOBJ + + ALLOCATE(NEWOBJ) + ! For owned storage data, re-allocate but do not copy data over + IF (SELF%OWNED) THEN + ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) + NEWOBJ%PTR => NEWOBJ%DATA + ELSE + NEWOBJ%PTR => SELF%PTR + END IF + NEWOBJ%VIEW => NULL() + NEWOBJ%NBLOCKS = SELF%NBLOCKS + NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER + NEWOBJ%OWNED = .FALSE. + END FUNCTION FIELD_3D_CLONE + + FUNCTION FIELD_4D_CLONE(SELF) RESULT(NEWOBJ) + ! Clone (deep-copy) this FIELD object, keeping the DATA pointer + ! intact, but replicating view pointers. + CLASS(FIELD_4D) :: SELF + TYPE(FIELD_4D), POINTER :: NEWOBJ + + ALLOCATE(NEWOBJ) + ! For owned storage data, re-allocate but do not copy data over + IF (SELF%OWNED) THEN + ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) + NEWOBJ%PTR => NEWOBJ%DATA + ELSE + NEWOBJ%PTR => SELF%PTR + END IF + NEWOBJ%VIEW => NULL() + NEWOBJ%NBLOCKS = SELF%NBLOCKS + NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER + NEWOBJ%OWNED = .FALSE. + END FUNCTION FIELD_4D_CLONE + + FUNCTION FIELD_INT2D_CLONE(SELF) RESULT(NEWOBJ) + ! Clone (deep-copy) this FIELD object, keeping the DATA pointer + ! intact, but replicating view pointers. + CLASS(FIELD_INT2D) :: SELF + TYPE(FIELD_INT2D), POINTER :: NEWOBJ + + ALLOCATE(NEWOBJ) + ! For owned storage data, re-allocate but do not copy data over + IF (SELF%OWNED) THEN + ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) + NEWOBJ%PTR => NEWOBJ%DATA + ELSE + NEWOBJ%PTR => SELF%PTR + END IF + NEWOBJ%VIEW => NULL() + NEWOBJ%NBLOCKS = SELF%NBLOCKS + NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER + NEWOBJ%OWNED = .FALSE. + END FUNCTION FIELD_INT2D_CLONE + + FUNCTION FIELD_LOG2D_CLONE(SELF) RESULT(NEWOBJ) + ! Clone (deep-copy) this FIELD object, keeping the DATA pointer + ! intact, but replicating view pointers. + CLASS(FIELD_LOG2D) :: SELF + TYPE(FIELD_LOG2D), POINTER :: NEWOBJ + + ALLOCATE(NEWOBJ) + ! For owned storage data, re-allocate but do not copy data over + IF (SELF%OWNED) THEN + ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) + NEWOBJ%PTR => NEWOBJ%DATA + ELSE + NEWOBJ%PTR => SELF%PTR + END IF + NEWOBJ%VIEW => NULL() + NEWOBJ%NBLOCKS = SELF%NBLOCKS + NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER + NEWOBJ%OWNED = .FALSE. + END FUNCTION FIELD_LOG2D_CLONE + + + SUBROUTINE FIELD_2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Sets the view pointer FIELD%MP to the block of the given index + CLASS(FIELD_2D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + SELF%VIEW => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + SELF%VIEW => SELF%PTR(:,IDX) + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) SELF%VIEW(:) = 0.0_JPRB + END IF + END SUBROUTINE FIELD_2D_UPDATE_VIEW + + SUBROUTINE FIELD_3D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Sets the view pointer FIELD%MP to the block of the given index + CLASS(FIELD_3D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + SELF%VIEW => SELF%DATA(:,:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + SELF%VIEW => SELF%PTR(:,:,IDX) + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) SELF%VIEW(:,:) = 0.0_JPRB + END IF + END SUBROUTINE FIELD_3D_UPDATE_VIEW + + SUBROUTINE FIELD_4D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Sets the view pointer FIELD%MP to the block of the given index + CLASS(FIELD_4D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + SELF%VIEW => SELF%DATA(:,:,:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + SELF%VIEW => SELF%PTR(:,:,:,IDX) + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) SELF%VIEW(:,:,:) = 0.0_JPRB + END IF + END SUBROUTINE FIELD_4D_UPDATE_VIEW + + SUBROUTINE FIELD_INT2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Sets the view pointer FIELD%MP to the block of the given index + CLASS(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + SELF%VIEW => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + SELF%VIEW => SELF%PTR(:,IDX) + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) SELF%VIEW(:) = 0.0_JPIM + END IF + END SUBROUTINE FIELD_INT2D_UPDATE_VIEW + + + SUBROUTINE FIELD_LOG2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Sets the view pointer FIELD%MP to the block of the given index + CLASS(FIELD_LOG2D), TARGET :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + SELF%VIEW => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + SELF%VIEW => SELF%PTR(:,IDX) + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) SELF%VIEW(:) = .FALSE. + END IF + END SUBROUTINE FIELD_LOG2D_UPDATE_VIEW + + SUBROUTINE FIELD_2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_2D), TARGET :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = 0.0_JPRB + END IF + END SUBROUTINE FIELD_2D_EXTRACT_VIEW + + SUBROUTINE FIELD_3D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_3D), TARGET :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB + END IF + END SUBROUTINE FIELD_3D_EXTRACT_VIEW + + SUBROUTINE FIELD_4D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_4D), TARGET :: SELF + REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,:,:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,:,:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB + END IF + END SUBROUTINE FIELD_4D_EXTRACT_VIEW + + SUBROUTINE FIELD_INT2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = 0.0_JPIM + END IF + END SUBROUTINE FIELD_INT2D_EXTRACT_VIEW + + SUBROUTINE FIELD_LOG2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_LOG2D), TARGET :: SELF + LOGICAL, POINTER, INTENT(INOUT) :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = .FALSE. + END IF + END SUBROUTINE FIELD_LOG2D_EXTRACT_VIEW + + FUNCTION FIELD_2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_2D), TARGET :: SELF + REAL(KIND=JPRB), POINTER :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE)) THEN + IF (BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = 0.0_JPRB + END IF + END FUNCTION FIELD_2D_GET_VIEW + + FUNCTION FIELD_3D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_3D), TARGET :: SELF + REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE)) THEN + IF (BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB + END IF + END FUNCTION FIELD_3D_GET_VIEW + + FUNCTION FIELD_4D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_4D), TARGET :: SELF + REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:,:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,:,:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,:,:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE)) THEN + IF (BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB + END IF + END FUNCTION FIELD_4D_GET_VIEW + + FUNCTION FIELD_INT2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE)) THEN + IF (BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = 0.0_JPIM + END IF + END FUNCTION FIELD_INT2D_GET_VIEW + + + FUNCTION FIELD_LOG2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) + ! Updates internal view and exports it to an external pointer + CLASS(FIELD_LOG2D), TARGET :: SELF + LOGICAL, POINTER :: VIEW_PTR(:) + INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE + LOGICAL, OPTIONAL, INTENT(IN) :: ZERO + INTEGER(KIND=JPIM) :: IDX + + IDX = BLOCK_INDEX + IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() + IF (SELF%ACTIVE .AND. SELF%OWNED) THEN + VIEW_PTR => SELF%DATA(:,IDX) + ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN + VIEW_PTR => SELF%PTR(:,IDX) + ELSE + VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer + END IF + + IF (PRESENT(BLOCK_SIZE)) THEN + IF (BLOCK_INDEX == SELF%NBLOCKS) THEN + ! Fill the the buffer by replicating the last entry + CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) + END IF + END IF + + IF (PRESENT(ZERO)) THEN + IF (ZERO) VIEW_PTR(:) = .FALSE. + END IF + END FUNCTION FIELD_LOG2D_GET_VIEW + + + SUBROUTINE FIELD_2D_CREATE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_2D), TARGET :: SELF + + SELF%DEVPTR => SELF%DATA + !$acc enter data create(SELF%DATA) + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_2D_CREATE_DEVICE + + SUBROUTINE FIELD_3D_CREATE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_3D), TARGET :: SELF + INTEGER(KIND=JPIM) :: ARRSIZE + + ARRSIZE = SIZE(SELF%PTR) * SIZEOF(1.0_JPRB) + ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) + CALL ACC_MAP_DATA(SELF%PTR, SELF%DEVDATA, ARRSIZE) + + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_3D_CREATE_DEVICE + + SUBROUTINE FIELD_4D_CREATE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_4D), TARGET :: SELF + + SELF%DEVPTR => SELF%DATA + !$acc enter data create(SELF%DATA) + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_4D_CREATE_DEVICE + + SUBROUTINE FIELD_INT2D_CREATE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_INT2D), TARGET :: SELF + + SELF%DEVPTR => SELF%DATA + !$acc enter data create(SELF%DATA) + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_INT2D_CREATE_DEVICE + + SUBROUTINE FIELD_LOG2D_CREATE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_LOG2D), TARGET :: SELF + + SELF%DEVPTR => SELF%DATA + !$acc enter data create(SELF%DATA) + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_LOG2D_CREATE_DEVICE + + FUNCTION FIELD_2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_2D), TARGET :: SELF + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + + IF (SELF%OWNED) THEN + DEVPTR => SELF%DATA + ELSE + DEVPTR => SELF%DEVPTR + END IF + END FUNCTION FIELD_2D_GET_DEVICE_DATA + + FUNCTION FIELD_3D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_3D), TARGET :: SELF + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) + + type(c_ptr) :: hptr + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + + IF (SELF%OWNED) THEN + DEVPTR => SELF%DATA + ELSE + hptr = acc_hostptr(self%devdata) + call c_f_pointer(hptr, devptr, shape(self%devdata)) + END IF + END FUNCTION FIELD_3D_GET_DEVICE_DATA + + FUNCTION FIELD_4D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_4D), TARGET :: SELF + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + + IF (SELF%OWNED) THEN + DEVPTR => SELF%DATA + ELSE + DEVPTR => SELF%DEVPTR + END IF + END FUNCTION FIELD_4D_GET_DEVICE_DATA + + FUNCTION FIELD_INT2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + + IF (SELF%OWNED) THEN + DEVPTR => SELF%DATA + ELSE + DEVPTR => SELF%DEVPTR + END IF + END FUNCTION FIELD_INT2D_GET_DEVICE_DATA + + FUNCTION FIELD_LOG2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_LOG2D), TARGET :: SELF + LOGICAL, POINTER, CONTIGUOUS :: DEVPTR(:,:) + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + + IF (SELF%OWNED) THEN + DEVPTR => SELF%DATA + ELSE + DEVPTR => SELF%DEVPTR + END IF + END FUNCTION FIELD_LOG2D_GET_DEVICE_DATA + + SUBROUTINE FIELD_2D_UPDATE_DEVICE(SELF) + ! Create a copy of this field on device and copy data over + CLASS(FIELD_2D), TARGET :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + !$acc enter data create(SELF%DATA) + !$acc update device(SELF%DATA(:,:)) + !$acc wait + SELF%DEVPTR => SELF%DATA + ELSE + ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) + !$acc enter data create(SELF%DEVPTR) + DO IBL=1, SELF%NBLOCKS + !$acc update device(SELF%DEVPTR(:,IBL)) + END DO + !$acc wait + END IF + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_2D_UPDATE_DEVICE + + SUBROUTINE FIELD_3D_UPDATE_DEVICE(SELF) + ! Create a copy of this field on device and copy data over + CLASS(FIELD_3D), TARGET :: SELF + INTEGER(KIND=JPIM) :: IBL + + INTEGER(KIND=JPIM) :: istat, arrsize, blksize + type(c_ptr) :: hptr + integer(kind=jpim) :: shape(3) + + logical :: pres + + arrsize = size(self%ptr) * sizeof(1.0_JPRB) + blksize = arrsize / self%nblocks + ALLOCATE(SELF%DEVDATA, mold=SELF%PTR) + + IF (SELF%OWNED) THEN + call acc_map_data(self%data, self%devdata, arrsize) + call acc_memcpy_to_device(self%devdata(:,:,:), self%data(:,:,:), arrsize) + + ELSE + ! TODO: This is a dirty trick to fool the OpenACC runtime! + ! We allocate the associated data array (full size), so that we can + ! add it to the OpenACC host-device map (it's contiguous!) + ! Then, we copy the data in a strided fashio from the discontiguous pointer. + ALLOCATE(SELF%DATA, MOLD=SELF%PTR) + call acc_map_data(self%data, self%devdata, arrsize) + DO IBL=1, SELF%NBLOCKS + call acc_memcpy_to_device(self%devdata(:,:,ibl), self%base_ptr(:,:,self%fidx,ibl), blksize) + END DO + END IF + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_3D_UPDATE_DEVICE + + SUBROUTINE FIELD_4D_UPDATE_DEVICE(SELF) + ! Create a copy of this field on device and copy data over + CLASS(FIELD_4D), TARGET :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + !$acc enter data create(SELF%DATA) + !$acc update device(SELF%DATA(:,:,:,:)) + !$acc wait + SELF%DEVPTR => SELF%DATA + ELSE + ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) + !$acc enter data create(SELF%DEVPTR) + DO IBL=1, SELF%NBLOCKS + !$acc update device(SELF%DEVPTR(:,:,:,IBL)) + END DO + !$acc wait + END IF + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_4D_UPDATE_DEVICE + + SUBROUTINE FIELD_INT2D_UPDATE_DEVICE(SELF) + ! Create a copy of this field on device and copy data over + CLASS(FIELD_INT2D), TARGET :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + !$acc enter data create(SELF%DATA) + !$acc update device(SELF%DATA(:,:)) + !$acc wait + SELF%DEVPTR => SELF%DATA + ELSE + ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) + !$acc enter data create(SELF%DEVPTR) + DO IBL=1, SELF%NBLOCKS + !$acc update device(SELF%DEVPTR(:,IBL)) + END DO + !$acc wait + END IF + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE + + SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE(SELF) + ! Create a copy of this field on device and copy data over + CLASS(FIELD_LOG2D), TARGET :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + !$acc enter data create(SELF%DATA) + !$acc update device(SELF%DATA(:,:)) + !$acc wait + SELF%DEVPTR => SELF%DATA + ELSE + ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) + !$acc enter data create(SELF%DEVPTR) + DO IBL=1, SELF%NBLOCKS + !$acc update device(SELF%DEVPTR(:,IBL)) + END DO + !$acc wait + END IF + SELF%ON_DEVICE = .TRUE. + END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE + + SUBROUTINE FIELD_2D_UPDATE_HOST(SELF) + ! Synchronize device data back to host + CLASS(FIELD_2D) :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + !$acc update host(SELF%DATA(:,:)) + !$acc wait + !$acc exit data delete(SELF%DATA) + ELSE + DO IBL=1, SELF%NBLOCKS + !$acc update host(SELF%DEVPTR(:,IBL)) + END DO + !$acc wait + !$acc exit data delete(SELF%DEVPTR) + SELF%PTR(:,:) = SELF%DEVPTR(:,:) + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_2D_UPDATE_HOST + + SUBROUTINE FIELD_3D_UPDATE_HOST(SELF) + ! Synchronize device data back to host + CLASS(FIELD_3D) :: SELF + INTEGER(KIND=JPIM) :: IBL + + INTEGER(KIND=JPIM) :: istat, arrsize, blksize + type(c_ptr) :: hptr + + arrsize = size(self%ptr) * sizeof(1.0_JPRB) + blksize = arrsize / self%nblocks + + IF (SELF%OWNED) THEN + call acc_memcpy_from_device(self%data(:,:,:), self%devdata(:,:,:), arrsize) + call acc_unmap_data(self%data) + + ELSE + ! call acc_memcpy_from_device(self%data(:,:,:), self%devdata(:,:,:), arrsize) + DO IBL=1, SELF%NBLOCKS + ! self%base_ptr(:,:,self%fidx,ibl) = self%data(:,:,ibl) + + ! call acc_memcpy_from_device(self%ptr(:,:,ibl), self%devdata(:,:,ibl), blksize) + call acc_memcpy_from_device(self%base_ptr(:,:,self%fidx,ibl), self%devdata(:,:,ibl), blksize) + END DO + call acc_unmap_data(self%data) + DEALLOCATE(SELF%DATA) + END IF + + DEALLOCATE(SELF%DEVDATA) + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_3D_UPDATE_HOST + + SUBROUTINE FIELD_4D_UPDATE_HOST(SELF) + ! Synchronize device data back to host + CLASS(FIELD_4D) :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + DO IBL=1, SELF%NBLOCKS + !$acc update host(SELF%DATA(:,:,:,IBL)) + END DO + !$acc wait + !$acc exit data delete(SELF%DATA) + ELSE + DO IBL=1, SELF%NBLOCKS + !$acc update host(SELF%DEVPTR(:,:,:,IBL)) + END DO + !$acc wait + !$acc exit data delete(SELF%DEVPTR) + SELF%PTR(:,:,:,:) = SELF%DEVPTR(:,:,:,:) + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_4D_UPDATE_HOST + + SUBROUTINE FIELD_INT2D_UPDATE_HOST(SELF) + ! Synchronize device data back to host + CLASS(FIELD_INT2D) :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + DO IBL=1, SELF%NBLOCKS + !$acc update host(SELF%DATA(:,IBL)) + END DO + !$acc wait + !$acc exit data delete(SELF%DATA) + ELSE + DO IBL=1, SELF%NBLOCKS + !$acc update host(SELF%DEVPTR(:,IBL)) + END DO + !$acc wait + !$acc exit data delete(SELF%DEVPTR) + SELF%PTR(:,:) = SELF%DEVPTR(:,:) + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_INT2D_UPDATE_HOST + + SUBROUTINE FIELD_LOG2D_UPDATE_HOST(SELF) + ! Synchronize device data back to host + CLASS(FIELD_LOG2D) :: SELF + INTEGER(KIND=JPIM) :: IBL + + IF (SELF%OWNED) THEN + DO IBL=1, SELF%NBLOCKS + !$acc update host(SELF%DATA(:,IBL)) + END DO + !$acc wait + !$acc exit data delete(SELF%DATA) + ELSE + DO IBL=1, SELF%NBLOCKS + !$acc update host(SELF%DEVPTR(:,IBL)) + END DO + !$acc wait + !$acc exit data delete(SELF%DEVPTR) + SELF%PTR(:,:) = SELF%DEVPTR(:,:) + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_LOG2D_UPDATE_HOST + + SUBROUTINE FIELD_2D_DELETE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_2D), TARGET :: SELF + + !$acc exit data delete(SELF%DEVPTR) + IF (SELF%OWNED) THEN + NULLIFY(SELF%DEVPTR) + ELSE + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_2D_DELETE_DEVICE + + SUBROUTINE FIELD_3D_DELETE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_3D), TARGET :: SELF + + IF (SELF%OWNED) THEN + CALL ACC_UNMAP_DATA(SELF%DATA) + ELSE + CALL ACC_UNMAP_DATA(SELF%PTR) + END IF + DEALLOCATE(SELF%DEVDATA) + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_3D_DELETE_DEVICE + + SUBROUTINE FIELD_4D_DELETE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_4D), TARGET :: SELF + + !$acc exit data delete(SELF%DEVPTR) + IF (SELF%OWNED) THEN + NULLIFY(SELF%DEVPTR) + ELSE + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_4D_DELETE_DEVICE + + SUBROUTINE FIELD_INT2D_DELETE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_INT2D), TARGET :: SELF + + !$acc exit data delete(SELF%DEVPTR) + IF (SELF%OWNED) THEN + NULLIFY(SELF%DEVPTR) + ELSE + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_INT2D_DELETE_DEVICE + + SUBROUTINE FIELD_LOG2D_DELETE_DEVICE(SELF) + ! Initialize a copy of this field on GPU device + CLASS(FIELD_LOG2D), TARGET :: SELF + + !$acc exit data delete(SELF%DEVPTR) + IF (SELF%OWNED) THEN + NULLIFY(SELF%DEVPTR) + ELSE + DEALLOCATE(SELF%DEVPTR) + END IF + SELF%ON_DEVICE = .FALSE. + END SUBROUTINE FIELD_LOG2D_DELETE_DEVICE + + SUBROUTINE FIELD_2D_ENSURE_HOST(SELF) + ! Ensure that field has been moved back to host + CLASS(FIELD_2D), TARGET :: SELF + + IF (SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_HOST() + END IF + END SUBROUTINE FIELD_2D_ENSURE_HOST + + SUBROUTINE FIELD_3D_ENSURE_HOST(SELF) + ! Ensure that field has been moved back to host + CLASS(FIELD_3D), TARGET :: SELF + + IF (SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_HOST() + END IF + END SUBROUTINE FIELD_3D_ENSURE_HOST + + SUBROUTINE FIELD_4D_ENSURE_HOST(SELF) + ! Ensure that field has been moved back to host + CLASS(FIELD_4D), TARGET :: SELF + + IF (SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_HOST() + END IF + END SUBROUTINE FIELD_4D_ENSURE_HOST + + SUBROUTINE FIELD_INT2D_ENSURE_HOST(SELF) + ! Ensure that field has been moved back to host + CLASS(FIELD_INT2D), TARGET :: SELF + + IF (SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_HOST() + END IF + END SUBROUTINE FIELD_INT2D_ENSURE_HOST + + SUBROUTINE FIELD_LOG2D_ENSURE_HOST(SELF) + ! Ensure that field has been moved back to host + CLASS(FIELD_LOG2D), TARGET :: SELF + + IF (SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_HOST() + END IF + END SUBROUTINE FIELD_LOG2D_ENSURE_HOST + + SUBROUTINE FIELD_2D_ENSURE_DEVICE(SELF) + ! Ensure that field has been moved over to device + CLASS(FIELD_2D), TARGET :: SELF + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + END SUBROUTINE FIELD_2D_ENSURE_DEVICE + + SUBROUTINE FIELD_3D_ENSURE_DEVICE(SELF) + ! Ensure that field has been moved over to device + CLASS(FIELD_3D), TARGET :: SELF + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + END SUBROUTINE FIELD_3D_ENSURE_DEVICE + + SUBROUTINE FIELD_4D_ENSURE_DEVICE(SELF) + ! Ensure that field has been moved over to device + CLASS(FIELD_4D), TARGET :: SELF + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + END SUBROUTINE FIELD_4D_ENSURE_DEVICE + + SUBROUTINE FIELD_INT2D_ENSURE_DEVICE(SELF) + ! Ensure that field has been moved over to device + CLASS(FIELD_INT2D), TARGET :: SELF + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + END SUBROUTINE FIELD_INT2D_ENSURE_DEVICE + + SUBROUTINE FIELD_LOG2D_ENSURE_DEVICE(SELF) + ! Ensure that field has been moved over to device + CLASS(FIELD_LOG2D), TARGET :: SELF + + IF (.NOT. SELF%ON_DEVICE) THEN + CALL SELF%UPDATE_DEVICE() + END IF + END SUBROUTINE FIELD_LOG2D_ENSURE_DEVICE + + SUBROUTINE FIELD_2D_FINAL(SELF) + ! Finalizes field and dealloactes owned data + CLASS(FIELD_2D) :: SELF + IF (SELF%OWNED) THEN + DEALLOCATE(SELF%DATA) + END IF + NULLIFY(SELF%PTR) + NULLIFY(SELF%VIEW) + END SUBROUTINE FIELD_2D_FINAL + + SUBROUTINE FIELD_3D_FINAL(SELF) + ! Finalizes field and dealloactes owned data + CLASS(FIELD_3D) :: SELF + IF (SELF%OWNED) THEN + DEALLOCATE(SELF%DATA) + END IF + NULLIFY(SELF%PTR) + NULLIFY(SELF%VIEW) + END SUBROUTINE FIELD_3D_FINAL + + SUBROUTINE FIELD_4D_FINAL(SELF) + ! Finalizes field and dealloactes owned data + CLASS(FIELD_4D) :: SELF + IF (SELF%OWNED) THEN + DEALLOCATE(SELF%DATA) + END IF + NULLIFY(SELF%PTR) + NULLIFY(SELF%VIEW) + END SUBROUTINE FIELD_4D_FINAL + + SUBROUTINE FIELD_INT2D_FINAL(SELF) + ! Finalizes field and dealloactes owned data + CLASS(FIELD_INT2D) :: SELF + IF (SELF%OWNED) THEN + DEALLOCATE(SELF%DATA) + END IF + NULLIFY(SELF%PTR) + NULLIFY(SELF%VIEW) + END SUBROUTINE FIELD_INT2D_FINAL + + SUBROUTINE FIELD_LOG2D_FINAL(SELF) + ! Finalizes field and dealloactes owned data + CLASS(FIELD_LOG2D) :: SELF + IF (SELF%OWNED) THEN + DEALLOCATE(SELF%DATA) + END IF + NULLIFY(SELF%PTR) + NULLIFY(SELF%VIEW) + END SUBROUTINE FIELD_LOG2D_FINAL + +END MODULE FIELD_MODULE diff --git a/src/common/module/yomphyder.F90 b/src/common/module/yomphyder.F90 index 116a24b4..afb1d454 100644 --- a/src/common/module/yomphyder.F90 +++ b/src/common/module/yomphyder.F90 @@ -37,8 +37,8 @@ module yomphyder !REAL(KIND=JPRB), dimension(:,:), pointer :: qsat ! spec. humidity at saturation #ifdef USE_FIELD_API - TYPE(FIELD_3D), POINTER :: F_T, F_A, F_Q - TYPE(FIELD_4D), POINTER :: F_CLD + CLASS(FIELD_3D), POINTER :: F_T, F_A, F_Q + CLASS(FIELD_4D), POINTER :: F_CLD #endif end type state_type From 9f2752540971707064d581aa94c414af82a50ab0 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 24 Oct 2022 11:13:10 +0000 Subject: [PATCH 002/174] Standalone FIELD_API repo added to bundle --- CMakeLists.txt | 7 + bundle.yml | 25 +- src/cloudsc_gpu/CMakeLists.txt | 2 +- src/common/CMakeLists.txt | 13 +- src/common/module/field_module.F90 | 6129 ----------------------- src/common/module/field_module_orig.F90 | 2189 -------- src/common/module/yomphyder.F90 | 2 +- 7 files changed, 34 insertions(+), 8333 deletions(-) delete mode 100644 src/common/module/field_module.F90 delete mode 100644 src/common/module/field_module_orig.F90 diff --git a/CMakeLists.txt b/CMakeLists.txt index 9eafd77e..18489907 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,13 @@ if( HAVE_SERIALBOX ) list(APPEND CLOUDSC_DEFINITIONS HAVE_SERIALBOX) endif() +# Add field_api library to manage data fields +ecbuild_add_option( FEATURE FIELD_API + DESCRIPTION "Use field_api to manage GPU data offload and copyback" + REQUIRED_PACKAGES "field_api" + CONDITION HAVE_CUDA + DEFAULT OFF ) + ecbuild_find_package( NAME loki ) # Add option for single-precision builds diff --git a/bundle.yml b/bundle.yml index ef5a8640..0784b76e 100644 --- a/bundle.yml +++ b/bundle.yml @@ -9,8 +9,8 @@ cmake : > projects : - ecbuild : - git : https://github.com/ecmwf/ecbuild - version : 3.6.4 + git : https://github.com/ecmwf/ecbuild + version : 3.7.0 bundle : false - serialbox : @@ -36,11 +36,21 @@ projects : LOKI_ENABLE_TESTS=OFF LOKI_ENABLE_NO_INSTALL=ON + - field_api : + git : ${BITBUCKET}/rdx/field_api + version : master + optional: true + require : ecbuild + cmake : > + ENABLE_FIELD_API_TESTS=OFF + ENABLE_FIELD_API_FIAT_BUILD=OFF + FIELD_API_UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module + - cloudsc-dwarf : # The CLOUDSC dwarf project with multiple implementations dir : $PWD version : develop - require : ecbuild serialbox loki + require : ecbuild serialbox loki field_api options : @@ -66,6 +76,7 @@ options : ENABLE_CLOUDSC_GPU_SCC_CUF=ON ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON ENABLE_CLOUDSC_GPU_SCC_FIELD=ON + ENABLE_FIELD_API=ON - with-mpi : help : Enable MPI-parallel kernel @@ -94,6 +105,14 @@ options : help : Frontend parser to use for Loki transformations cmake : LOKI_FRONTEND={{value}} + - field_api-old-copy : + help : Use COPY for GPU offload rather than UPDATE_DEVICE/UPDATE_HOST + cmake : ENABLE_FIELD_API_OLD_COPY=ON + + - field_api-testing : + help : Enable field_api testing + cmake : ENABLE_FIELD_API_TESTS=ON + - cloudsc-prototype1 : help : Build the original operational Fortran prototype [ON|OFF] cmake : ENABLE_CLOUDSC_PROTOTYPE1={{value}} diff --git a/src/cloudsc_gpu/CMakeLists.txt b/src/cloudsc_gpu/CMakeLists.txt index b4a482c2..a6c2e4cf 100644 --- a/src/cloudsc_gpu/CMakeLists.txt +++ b/src/cloudsc_gpu/CMakeLists.txt @@ -45,7 +45,7 @@ ecbuild_add_option( FEATURE CLOUDSC_GPU_OMP_SCC_HOIST ecbuild_add_option( FEATURE CLOUDSC_GPU_SCC_FIELD DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC with FIELD API" DEFAULT OFF - CONDITION HAVE_CUDA AND ( Serialbox_FOUND OR HDF5_FOUND ) + CONDITION HAVE_FIELD_API AND ( Serialbox_FOUND OR HDF5_FOUND ) ) diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index f96d6bbd..47fc8214 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -40,12 +40,6 @@ list(APPEND CLOUDSC_CUDA_SOURCES module/fcttre_mod.cuf.F90 module/yoethf.cuf.F90 module/yomcst.cuf.F90 - module/field_module.F90 -) - -list(APPEND CLOUDSC_FIELD_SOURCES - module/field_module.F90 - module/cloudsc_field_state_mod.F90 ) if( HAVE_CUDA ) @@ -63,9 +57,6 @@ if( HAVE_CUDA ) # Add CUDA-specific flags to the library if enabled list(APPEND CLOUDSC_COMMON_SOURCES ${CLOUDSC_CUDA_SOURCES} ) - - # If CUDA is enabled, we can also compile the FIELD API utilities - list(APPEND CLOUDSC_COMMON_SOURCES ${CLOUDSC_FIELD_SOURCES} ) endif() @@ -85,11 +76,12 @@ ecbuild_add_library( TARGET cloudsc-common-lib TYPE ${LIBRARY_TYPE} DEFINITIONS ${CLOUDSC_DEFINITIONS} - $<${HAVE_CUDA}:USE_FIELD_API> + $<${HAVE_FIELD_API}:USE_FIELD_API> SOURCES ${CLOUDSC_COMMON_SOURCES} $<${HAVE_MPI}:module/cloudsc_mpif.F90> $<${HAVE_HDF5}:module/hdf5_file_mod.F90> + $<${HAVE_FIELD_API}:module/cloudsc_field_state_mod.F90> PRIVATE_INCLUDES $<${HAVE_HDF5}:${HDF5_Fortran_INCLUDE_DIRS}> PUBLIC_INCLUDES @@ -102,6 +94,7 @@ ecbuild_add_library( TARGET cloudsc-common-lib $<${HAVE_MPI}:MPI::MPI_Fortran> $<${HAVE_HDF5}:hdf5::hdf5_fortran> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_Fortran> + $<${HAVE_FIELD_API}:field_api> ) if( HAVE_CUDA ) diff --git a/src/common/module/field_module.F90 b/src/common/module/field_module.F90 deleted file mode 100644 index ca1e3101..00000000 --- a/src/common/module/field_module.F90 +++ /dev/null @@ -1,6129 +0,0 @@ -! Rank and shape definitions for simple templating -! -! Note that the ranks encode coneptual dimensions here, eg. FIELD_2D encodes -! a surface field and FIELD_3D represents a field with a vertical component. - -MODULE FIELD_MODULE - ! The FIELD types provided by this module provide data abstractions that - ! decouple data storage in memory from the data views used in thread-parallel - ! sections of the code. They are intended to thinly wrap ATLAS_FIELD - ! objects and provide additional features that may later be - ! incorporated into Atlas. They can also provide backward-compatibility - ! for non-Atlas execution modes. - -USE PARKIND1, ONLY: JPIM, JPRB, JPLM -USE OML_MOD, ONLY: OML_MAX_THREADS, OML_MY_THREAD -USE IEEE_ARITHMETIC, ONLY: IEEE_SIGNALING_NAN - -USE CUDAFOR -USE ISO_C_BINDING -USE OPENACC - -IMPLICIT NONE - -INTEGER (KIND=JPIM), PARAMETER :: NDEVFRESH = INT(B'00000001', JPIM), NHSTFRESH = INT(B'00000010', JPIM) -INTEGER (KIND=JPIM), PARAMETER, PRIVATE :: NH2D = 1, ND2H = 2, NRD = INT(B'00000001', JPIM), NWR = INT(B'00000010', JPIM) - -TYPE GPU_STATS - INTEGER :: TRANSFER_CPU_TO_GPU = 0 - INTEGER :: TRANSFER_GPU_TO_CPU = 0 - REAL :: TOTAL_TIME_TRANSFER_CPU_TO_GPU = 0 - REAL :: TOTAL_TIME_TRANSFER_GPU_TO_CPU = 0 - CONTAINS - PROCEDURE :: INC_CPU_TO_GPU_TRANSFER - PROCEDURE :: INC_GPU_TO_CPU_TRANSFER -END TYPE GPU_STATS - -TYPE, ABSTRACT :: FIELD_2D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - REAL(KIND=JPRB), POINTER :: PTR(:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_2D_FINAL - PROCEDURE :: FIELD_2D_FINAL - PROCEDURE :: GET_VIEW => FIELD_2D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_2D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_2D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_2D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_2D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_2D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_2D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_2D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_2D_GET_HOST_DATA -END TYPE FIELD_2D - -TYPE, EXTENDS(FIELD_2D) :: FIELD_2D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_2D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_2D_WRAPPER_FINAL -END TYPE FIELD_2D_WRAPPER - -TYPE, EXTENDS(FIELD_2D) :: FIELD_2D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_2D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_2D_OWNER_FINAL -END TYPE FIELD_2D_OWNER - -TYPE, EXTENDS(FIELD_2D) :: FIELD_2D_WRAPPER_PACKED - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_2D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_2D_WRAPPER_PACKED_FINAL -END TYPE FIELD_2D_WRAPPER_PACKED - -TYPE FIELD_2D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_2D), POINTER :: PTR => NULL() -END TYPE FIELD_2D_PTR - -TYPE FIELD_2D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:) => NULL() -END TYPE FIELD_2D_VIEW - -TYPE, ABSTRACT :: FIELD_3D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - REAL(KIND=JPRB), POINTER :: PTR(:,:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_3D_FINAL - PROCEDURE :: FIELD_3D_FINAL - PROCEDURE :: GET_VIEW => FIELD_3D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_3D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_3D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_3D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_3D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_3D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_3D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_3D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_3D_GET_HOST_DATA -END TYPE FIELD_3D - -TYPE, EXTENDS(FIELD_3D) :: FIELD_3D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_3D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_3D_WRAPPER_FINAL -END TYPE FIELD_3D_WRAPPER - -TYPE, EXTENDS(FIELD_3D) :: FIELD_3D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_3D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_3D_OWNER_FINAL -END TYPE FIELD_3D_OWNER - -TYPE, EXTENDS(FIELD_3D) :: FIELD_3D_WRAPPER_PACKED - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_3D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_3D_WRAPPER_PACKED_FINAL -END TYPE FIELD_3D_WRAPPER_PACKED - -TYPE FIELD_3D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_3D), POINTER :: PTR => NULL() -END TYPE FIELD_3D_PTR - -TYPE FIELD_3D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:,:) => NULL() -END TYPE FIELD_3D_VIEW - -TYPE, ABSTRACT :: FIELD_4D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_4D_FINAL - PROCEDURE :: FIELD_4D_FINAL - PROCEDURE :: GET_VIEW => FIELD_4D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_4D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_4D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_4D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_4D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_4D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_4D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_4D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_4D_GET_HOST_DATA -END TYPE FIELD_4D - -TYPE, EXTENDS(FIELD_4D) :: FIELD_4D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_4D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_4D_WRAPPER_FINAL -END TYPE FIELD_4D_WRAPPER - -TYPE, EXTENDS(FIELD_4D) :: FIELD_4D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_4D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_4D_OWNER_FINAL -END TYPE FIELD_4D_OWNER - -TYPE, EXTENDS(FIELD_4D) :: FIELD_4D_WRAPPER_PACKED - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_4D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_4D_WRAPPER_PACKED_FINAL -END TYPE FIELD_4D_WRAPPER_PACKED - -TYPE FIELD_4D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_4D), POINTER :: PTR => NULL() -END TYPE FIELD_4D_PTR - -TYPE FIELD_4D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:,:,:) => NULL() -END TYPE FIELD_4D_VIEW - -TYPE, ABSTRACT :: FIELD_5D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:,:) => NULL() - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_5D_FINAL - PROCEDURE :: FIELD_5D_FINAL - PROCEDURE :: GET_VIEW => FIELD_5D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_5D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_5D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_5D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_5D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_5D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_5D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_5D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_5D_GET_HOST_DATA -END TYPE FIELD_5D - -TYPE, EXTENDS(FIELD_5D) :: FIELD_5D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_5D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_5D_WRAPPER_FINAL -END TYPE FIELD_5D_WRAPPER - -TYPE, EXTENDS(FIELD_5D) :: FIELD_5D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_5D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_5D_OWNER_FINAL -END TYPE FIELD_5D_OWNER - -TYPE, EXTENDS(FIELD_5D) :: FIELD_5D_WRAPPER_PACKED - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_5D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_5D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_5D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_5D_WRAPPER_PACKED_FINAL -END TYPE FIELD_5D_WRAPPER_PACKED - -TYPE FIELD_5D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_5D), POINTER :: PTR => NULL() -END TYPE FIELD_5D_PTR - -TYPE FIELD_5D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:,:,:,:) => NULL() -END TYPE FIELD_5D_VIEW - -TYPE, ABSTRACT :: FIELD_INT2D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - INTEGER(KIND=JPIM), POINTER :: PTR(:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_INT2D_FINAL - PROCEDURE :: FIELD_INT2D_FINAL - PROCEDURE :: GET_VIEW => FIELD_INT2D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT2D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT2D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT2D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT2D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_INT2D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_INT2D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT2D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT2D_GET_HOST_DATA -END TYPE FIELD_INT2D - -TYPE, EXTENDS(FIELD_INT2D) :: FIELD_INT2D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_INT2D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_INT2D_WRAPPER_FINAL -END TYPE FIELD_INT2D_WRAPPER - -TYPE, EXTENDS(FIELD_INT2D) :: FIELD_INT2D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_INT2D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_INT2D_OWNER_FINAL -END TYPE FIELD_INT2D_OWNER - -TYPE, EXTENDS(FIELD_INT2D) :: FIELD_INT2D_WRAPPER_PACKED - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_INT2D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_INT2D_WRAPPER_PACKED_FINAL -END TYPE FIELD_INT2D_WRAPPER_PACKED - -TYPE FIELD_INT2D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_INT2D), POINTER :: PTR => NULL() -END TYPE FIELD_INT2D_PTR - -TYPE FIELD_INT2D_VIEW - ! Struct to hold array views, so we can make arrays of them - INTEGER(KIND=JPIM), POINTER :: P(:) => NULL() -END TYPE FIELD_INT2D_VIEW - -TYPE, ABSTRACT :: FIELD_INT3D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_INT3D_FINAL - PROCEDURE :: FIELD_INT3D_FINAL - PROCEDURE :: GET_VIEW => FIELD_INT3D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT3D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT3D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT3D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT3D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_INT3D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_INT3D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT3D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT3D_GET_HOST_DATA -END TYPE FIELD_INT3D - -TYPE, EXTENDS(FIELD_INT3D) :: FIELD_INT3D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_INT3D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_INT3D_WRAPPER_FINAL -END TYPE FIELD_INT3D_WRAPPER - -TYPE, EXTENDS(FIELD_INT3D) :: FIELD_INT3D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_INT3D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_INT3D_OWNER_FINAL -END TYPE FIELD_INT3D_OWNER - -TYPE, EXTENDS(FIELD_INT3D) :: FIELD_INT3D_WRAPPER_PACKED - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_INT3D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT3D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT3D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_INT3D_WRAPPER_PACKED_FINAL -END TYPE FIELD_INT3D_WRAPPER_PACKED - -TYPE FIELD_INT3D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_INT3D), POINTER :: PTR => NULL() -END TYPE FIELD_INT3D_PTR - -TYPE FIELD_INT3D_VIEW - ! Struct to hold array views, so we can make arrays of them - INTEGER(KIND=JPIM), POINTER :: P(:,:) => NULL() -END TYPE FIELD_INT3D_VIEW - -TYPE, ABSTRACT :: FIELD_INT4D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_INT4D_FINAL - PROCEDURE :: FIELD_INT4D_FINAL - PROCEDURE :: GET_VIEW => FIELD_INT4D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT4D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT4D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT4D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT4D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_INT4D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_INT4D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT4D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT4D_GET_HOST_DATA -END TYPE FIELD_INT4D - -TYPE, EXTENDS(FIELD_INT4D) :: FIELD_INT4D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_INT4D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_INT4D_WRAPPER_FINAL -END TYPE FIELD_INT4D_WRAPPER - -TYPE, EXTENDS(FIELD_INT4D) :: FIELD_INT4D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_INT4D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_INT4D_OWNER_FINAL -END TYPE FIELD_INT4D_OWNER - -TYPE, EXTENDS(FIELD_INT4D) :: FIELD_INT4D_WRAPPER_PACKED - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_INT4D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT4D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT4D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_INT4D_WRAPPER_PACKED_FINAL -END TYPE FIELD_INT4D_WRAPPER_PACKED - -TYPE FIELD_INT4D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_INT4D), POINTER :: PTR => NULL() -END TYPE FIELD_INT4D_PTR - -TYPE FIELD_INT4D_VIEW - ! Struct to hold array views, so we can make arrays of them - INTEGER(KIND=JPIM), POINTER :: P(:,:,:) => NULL() -END TYPE FIELD_INT4D_VIEW - -TYPE, ABSTRACT :: FIELD_INT5D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:,:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - INTEGER(KIND=JPIM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_INT5D_FINAL - PROCEDURE :: FIELD_INT5D_FINAL - PROCEDURE :: GET_VIEW => FIELD_INT5D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_INT5D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_INT5D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_INT5D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_INT5D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_INT5D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_INT5D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_INT5D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_INT5D_GET_HOST_DATA -END TYPE FIELD_INT5D - -TYPE, EXTENDS(FIELD_INT5D) :: FIELD_INT5D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_INT5D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_INT5D_WRAPPER_FINAL -END TYPE FIELD_INT5D_WRAPPER - -TYPE, EXTENDS(FIELD_INT5D) :: FIELD_INT5D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_INT5D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_INT5D_OWNER_FINAL -END TYPE FIELD_INT5D_OWNER - -TYPE, EXTENDS(FIELD_INT5D) :: FIELD_INT5D_WRAPPER_PACKED - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_INT5D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_INT5D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_INT5D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_INT5D_WRAPPER_PACKED_FINAL -END TYPE FIELD_INT5D_WRAPPER_PACKED - -TYPE FIELD_INT5D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_INT5D), POINTER :: PTR => NULL() -END TYPE FIELD_INT5D_PTR - -TYPE FIELD_INT5D_VIEW - ! Struct to hold array views, so we can make arrays of them - INTEGER(KIND=JPIM), POINTER :: P(:,:,:,:) => NULL() -END TYPE FIELD_INT5D_VIEW - -TYPE, ABSTRACT :: FIELD_LOG2D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_LOG2D_FINAL - PROCEDURE :: FIELD_LOG2D_FINAL - PROCEDURE :: GET_VIEW => FIELD_LOG2D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG2D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG2D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG2D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG2D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_LOG2D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_LOG2D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG2D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG2D_GET_HOST_DATA -END TYPE FIELD_LOG2D - -TYPE, EXTENDS(FIELD_LOG2D) :: FIELD_LOG2D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_LOG2D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_LOG2D_WRAPPER_FINAL -END TYPE FIELD_LOG2D_WRAPPER - -TYPE, EXTENDS(FIELD_LOG2D) :: FIELD_LOG2D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_LOG2D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_LOG2D_OWNER_FINAL -END TYPE FIELD_LOG2D_OWNER - -TYPE, EXTENDS(FIELD_LOG2D) :: FIELD_LOG2D_WRAPPER_PACKED - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_LOG2D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_LOG2D_WRAPPER_PACKED_FINAL -END TYPE FIELD_LOG2D_WRAPPER_PACKED - -TYPE FIELD_LOG2D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_LOG2D), POINTER :: PTR => NULL() -END TYPE FIELD_LOG2D_PTR - -TYPE FIELD_LOG2D_VIEW - ! Struct to hold array views, so we can make arrays of them - LOGICAL(KIND=JPLM), POINTER :: P(:) => NULL() -END TYPE FIELD_LOG2D_VIEW - -TYPE, ABSTRACT :: FIELD_LOG3D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_LOG3D_FINAL - PROCEDURE :: FIELD_LOG3D_FINAL - PROCEDURE :: GET_VIEW => FIELD_LOG3D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG3D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG3D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG3D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG3D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_LOG3D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_LOG3D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG3D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG3D_GET_HOST_DATA -END TYPE FIELD_LOG3D - -TYPE, EXTENDS(FIELD_LOG3D) :: FIELD_LOG3D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_LOG3D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_LOG3D_WRAPPER_FINAL -END TYPE FIELD_LOG3D_WRAPPER - -TYPE, EXTENDS(FIELD_LOG3D) :: FIELD_LOG3D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_LOG3D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_LOG3D_OWNER_FINAL -END TYPE FIELD_LOG3D_OWNER - -TYPE, EXTENDS(FIELD_LOG3D) :: FIELD_LOG3D_WRAPPER_PACKED - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_LOG3D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG3D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG3D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_LOG3D_WRAPPER_PACKED_FINAL -END TYPE FIELD_LOG3D_WRAPPER_PACKED - -TYPE FIELD_LOG3D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_LOG3D), POINTER :: PTR => NULL() -END TYPE FIELD_LOG3D_PTR - -TYPE FIELD_LOG3D_VIEW - ! Struct to hold array views, so we can make arrays of them - LOGICAL(KIND=JPLM), POINTER :: P(:,:) => NULL() -END TYPE FIELD_LOG3D_VIEW - -TYPE, ABSTRACT :: FIELD_LOG4D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_LOG4D_FINAL - PROCEDURE :: FIELD_LOG4D_FINAL - PROCEDURE :: GET_VIEW => FIELD_LOG4D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG4D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG4D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG4D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG4D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_LOG4D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_LOG4D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG4D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG4D_GET_HOST_DATA -END TYPE FIELD_LOG4D - -TYPE, EXTENDS(FIELD_LOG4D) :: FIELD_LOG4D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_LOG4D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_LOG4D_WRAPPER_FINAL -END TYPE FIELD_LOG4D_WRAPPER - -TYPE, EXTENDS(FIELD_LOG4D) :: FIELD_LOG4D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_LOG4D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_LOG4D_OWNER_FINAL -END TYPE FIELD_LOG4D_OWNER - -TYPE, EXTENDS(FIELD_LOG4D) :: FIELD_LOG4D_WRAPPER_PACKED - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_LOG4D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG4D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG4D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_LOG4D_WRAPPER_PACKED_FINAL -END TYPE FIELD_LOG4D_WRAPPER_PACKED - -TYPE FIELD_LOG4D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_LOG4D), POINTER :: PTR => NULL() -END TYPE FIELD_LOG4D_PTR - -TYPE FIELD_LOG4D_VIEW - ! Struct to hold array views, so we can make arrays of them - LOGICAL(KIND=JPLM), POINTER :: P(:,:,:) => NULL() -END TYPE FIELD_LOG4D_VIEW - -TYPE, ABSTRACT :: FIELD_LOG5D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:,:) => NULL() - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: DATA(:,:,:,:,:) => NULL() - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - LOGICAL(KIND=JPLM), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:,:,:) - - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - - INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 - - TYPE(GPU_STATS) :: STATS - -CONTAINS - - PROCEDURE :: FINAL => FIELD_LOG5D_FINAL - PROCEDURE :: FIELD_LOG5D_FINAL - PROCEDURE :: GET_VIEW => FIELD_LOG5D_GET_VIEW - PROCEDURE :: GET_HOST_DATA_RDONLY => FIELD_LOG5D_GET_HOST_DATA_RDONLY - PROCEDURE :: GET_HOST_DATA_RDWR => FIELD_LOG5D_GET_HOST_DATA_RDWR - PROCEDURE :: GET_DEVICE_DATA_RDONLY => FIELD_LOG5D_GET_DEVICE_DATA_RDONLY - PROCEDURE :: GET_DEVICE_DATA_RDWR => FIELD_LOG5D_GET_DEVICE_DATA_RDWR - PROCEDURE :: DELETE_DEVICE => FIELD_LOG5D_DELETE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_LOG5D_ENSURE_HOST - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST - - PROCEDURE, PRIVATE :: GET_DEVICE_DATA => FIELD_LOG5D_GET_DEVICE_DATA - PROCEDURE, PRIVATE :: GET_HOST_DATA => FIELD_LOG5D_GET_HOST_DATA -END TYPE FIELD_LOG5D - -TYPE, EXTENDS(FIELD_LOG5D) :: FIELD_LOG5D_WRAPPER -CONTAINS - PROCEDURE :: INIT => FIELD_LOG5D_WRAP - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST_WRAPPER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE_WRAPPER - PROCEDURE :: FINAL => FIELD_LOG5D_WRAPPER_FINAL -END TYPE FIELD_LOG5D_WRAPPER - -TYPE, EXTENDS(FIELD_LOG5D) :: FIELD_LOG5D_OWNER - TYPE(C_PTR) :: HPTR -CONTAINS - PROCEDURE :: INIT => FIELD_LOG5D_ALLOCATE - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST_OWNER - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE_OWNER - PROCEDURE :: FINAL => FIELD_LOG5D_OWNER_FINAL -END TYPE FIELD_LOG5D_OWNER - -TYPE, EXTENDS(FIELD_LOG5D) :: FIELD_LOG5D_WRAPPER_PACKED - LOGICAL(KIND=JPLM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX -CONTAINS - PROCEDURE :: INIT => FIELD_LOG5D_WRAP_PACKED - PROCEDURE, PRIVATE :: UPDATE_HOST => FIELD_LOG5D_UPDATE_HOST_WRAPPER_PACKED - PROCEDURE, PRIVATE :: UPDATE_DEVICE => FIELD_LOG5D_UPDATE_DEVICE_WRAPPER_PACKED - PROCEDURE :: FINAL => FIELD_LOG5D_WRAPPER_PACKED_FINAL -END TYPE FIELD_LOG5D_WRAPPER_PACKED - -TYPE FIELD_LOG5D_PTR - ! Struct to hold references to field objects - CLASS(FIELD_LOG5D), POINTER :: PTR => NULL() -END TYPE FIELD_LOG5D_PTR - -TYPE FIELD_LOG5D_VIEW - ! Struct to hold array views, so we can make arrays of them - LOGICAL(KIND=JPLM), POINTER :: P(:,:,:,:) => NULL() -END TYPE FIELD_LOG5D_VIEW - -CONTAINS -! -! CLASS METHODS -! - SUBROUTINE FIELD_2D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_2D_WRAPPER), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 2)) THEN - CALL ABOR1 ('FIELD_2D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_2D_WRAP - - SUBROUTINE FIELD_2D_UPDATE_DEVICE(SELF) - CLASS(FIELD_2D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_2D_UPDATE_DEVICE - - SUBROUTINE FIELD_2D_UPDATE_HOST(SELF) - CLASS(FIELD_2D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_2D_UPDATE_HOST - - SUBROUTINE FIELD_2D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_2D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA(:,IDX,:) - ELSE - SELF%PTR => DATA(:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_2D_WRAP_PACKED - - SUBROUTINE FIELD_2D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_2D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(2) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(2) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(2) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(2) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(2) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 2 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(2) = 1 - REAL_UBOUNDS(2) = UBOUNDS(2) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_2D_ALLOCATE - - FUNCTION FIELD_2D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_2D) :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):) => SELF%PTR(:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPRB - END IF - END FUNCTION FIELD_2D_GET_VIEW - - SUBROUTINE FIELD_2D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_2D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_2D_DELETE_DEVICE - - SUBROUTINE FIELD_2D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_2D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_2D_FINAL - - SUBROUTINE FIELD_2D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_2D_WRAPPER) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_2D_FINAL - END SUBROUTINE FIELD_2D_WRAPPER_FINAL - - SUBROUTINE FIELD_2D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_2D_WRAPPER_PACKED) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_2D_FINAL - END SUBROUTINE FIELD_2D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_2D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_2D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_2D_FINAL - END SUBROUTINE FIELD_2D_OWNER_FINAL - - SUBROUTINE FIELD_2D_ENSURE_HOST(SELF) - CLASS(FIELD_2D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_2D_ENSURE_HOST - - SUBROUTINE FIELD_2D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_2D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):) => SELF%PTR (:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_2D_GET_HOST_DATA - - SUBROUTINE FIELD_2D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_2D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_2D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_2D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_2D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_2D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_2D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_2D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_2D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_2D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):) => SELF%DEVPTR(:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_2D_GET_DEVICE_DATA - - SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_2D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_2D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_2D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_2D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_2D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_2D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%PTR(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_2D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%BASE_PTR(:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_2D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:, SELF%FIDX,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_2D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_2D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_2D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_2D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_2D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_2D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_2D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_3D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_3D_WRAPPER), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 3)) THEN - CALL ABOR1 ('FIELD_3D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_3D_WRAP - - SUBROUTINE FIELD_3D_UPDATE_DEVICE(SELF) - CLASS(FIELD_3D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_3D_UPDATE_DEVICE - - SUBROUTINE FIELD_3D_UPDATE_HOST(SELF) - CLASS(FIELD_3D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_3D_UPDATE_HOST - - SUBROUTINE FIELD_3D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_3D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA(:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_3D_WRAP_PACKED - - SUBROUTINE FIELD_3D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_3D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(3) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(3) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(3) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(3) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(3) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 3 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(3) = 1 - REAL_UBOUNDS(3) = UBOUNDS(3) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_3D_ALLOCATE - - FUNCTION FIELD_3D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_3D) :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):) => SELF%PTR(:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB - END IF - END FUNCTION FIELD_3D_GET_VIEW - - SUBROUTINE FIELD_3D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_3D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_3D_DELETE_DEVICE - - SUBROUTINE FIELD_3D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_3D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_3D_FINAL - - SUBROUTINE FIELD_3D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_3D_WRAPPER) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_3D_FINAL - END SUBROUTINE FIELD_3D_WRAPPER_FINAL - - SUBROUTINE FIELD_3D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_3D_WRAPPER_PACKED) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_3D_FINAL - END SUBROUTINE FIELD_3D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_3D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_3D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_3D_FINAL - END SUBROUTINE FIELD_3D_OWNER_FINAL - - SUBROUTINE FIELD_3D_ENSURE_HOST(SELF) - CLASS(FIELD_3D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_3D_ENSURE_HOST - - SUBROUTINE FIELD_3D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_3D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%PTR (:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_3D_GET_HOST_DATA - - SUBROUTINE FIELD_3D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_3D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_3D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_3D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_3D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_3D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_3D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_3D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_3D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_3D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%DEVPTR(:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_3D_GET_DEVICE_DATA - - SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_3D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_3D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_3D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_3D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_3D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_3D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%PTR(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_3D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%BASE_PTR(:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_3D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_3D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_3D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_3D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_3D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_3D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_3D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_3D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_4D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_4D_WRAPPER), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 4)) THEN - CALL ABOR1 ('FIELD_4D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_4D_WRAP - - SUBROUTINE FIELD_4D_UPDATE_DEVICE(SELF) - CLASS(FIELD_4D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_4D_UPDATE_DEVICE - - SUBROUTINE FIELD_4D_UPDATE_HOST(SELF) - CLASS(FIELD_4D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_4D_UPDATE_HOST - - SUBROUTINE FIELD_4D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_4D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA(:,:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_4D_WRAP_PACKED - - SUBROUTINE FIELD_4D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_4D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(4) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(4) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(4) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(4) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(4) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 4 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(4) = 1 - REAL_UBOUNDS(4) = UBOUNDS(4) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_4D_ALLOCATE - - FUNCTION FIELD_4D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_4D) :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):) => SELF%PTR(:,:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB - END IF - END FUNCTION FIELD_4D_GET_VIEW - - SUBROUTINE FIELD_4D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_4D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_4D_DELETE_DEVICE - - SUBROUTINE FIELD_4D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_4D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_4D_FINAL - - SUBROUTINE FIELD_4D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_4D_WRAPPER) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_4D_FINAL - END SUBROUTINE FIELD_4D_WRAPPER_FINAL - - SUBROUTINE FIELD_4D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_4D_WRAPPER_PACKED) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_4D_FINAL - END SUBROUTINE FIELD_4D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_4D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_4D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_4D_FINAL - END SUBROUTINE FIELD_4D_OWNER_FINAL - - SUBROUTINE FIELD_4D_ENSURE_HOST(SELF) - CLASS(FIELD_4D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_4D_ENSURE_HOST - - SUBROUTINE FIELD_4D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_4D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%PTR (:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_4D_GET_HOST_DATA - - SUBROUTINE FIELD_4D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_4D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_4D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_4D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_4D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_4D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_4D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_4D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_4D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_4D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%DEVPTR(:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_4D_GET_DEVICE_DATA - - SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_4D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_4D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_4D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_4D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_4D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_4D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%PTR(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_4D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_4D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_4D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_4D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_4D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_4D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_4D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_4D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_4D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_5D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_5D_WRAPPER), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 5)) THEN - CALL ABOR1 ('FIELD_5D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_5D_WRAP - - SUBROUTINE FIELD_5D_UPDATE_DEVICE(SELF) - CLASS(FIELD_5D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_5D_UPDATE_DEVICE - - SUBROUTINE FIELD_5D_UPDATE_HOST(SELF) - CLASS(FIELD_5D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_5D_UPDATE_HOST - - SUBROUTINE FIELD_5D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_5D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA(:,:,:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_5D_WRAP_PACKED - - SUBROUTINE FIELD_5D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_5D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(5) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(5) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(5) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(5) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(5) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 5 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(5) = 1 - REAL_UBOUNDS(5) = UBOUNDS(5) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * (REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1) * SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1,REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):,REAL_LBOUNDS(5):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_5D_ALLOCATE - - FUNCTION FIELD_5D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_5D) :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):,LBOUNDS(4):) => SELF%PTR(:,:,:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:,:) = 0.0_JPRB - END IF - END FUNCTION FIELD_5D_GET_VIEW - - SUBROUTINE FIELD_5D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_5D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_5D_DELETE_DEVICE - - SUBROUTINE FIELD_5D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_5D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_5D_FINAL - - SUBROUTINE FIELD_5D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_5D_WRAPPER) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_5D_FINAL - END SUBROUTINE FIELD_5D_WRAPPER_FINAL - - SUBROUTINE FIELD_5D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_5D_WRAPPER_PACKED) :: SELF - REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_5D_FINAL - END SUBROUTINE FIELD_5D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_5D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_5D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_5D_FINAL - END SUBROUTINE FIELD_5D_OWNER_FINAL - - SUBROUTINE FIELD_5D_ENSURE_HOST(SELF) - CLASS(FIELD_5D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_5D_ENSURE_HOST - - SUBROUTINE FIELD_5D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_5D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%PTR (:,:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_5D_GET_HOST_DATA - - SUBROUTINE FIELD_5D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_5D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_5D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_5D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_5D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_5D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_5D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_5D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_5D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_5D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%DEVPTR(:,:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_5D_GET_DEVICE_DATA - - SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_5D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_5D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_5D), INTENT (INOUT) :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_5D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_5D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_5D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%PTR(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_5D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,:,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_5D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_5D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_5D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_5D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_5D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_5D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_5D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1.0_JPRB) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_5D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_INT2D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT2D_WRAPPER), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 2)) THEN - CALL ABOR1 ('FIELD_INT2D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_INT2D_WRAP - - SUBROUTINE FIELD_INT2D_UPDATE_DEVICE(SELF) - CLASS(FIELD_INT2D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE - - SUBROUTINE FIELD_INT2D_UPDATE_HOST(SELF) - CLASS(FIELD_INT2D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT2D_UPDATE_HOST - - SUBROUTINE FIELD_INT2D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT2D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA(:,IDX,:) - ELSE - SELF%PTR => DATA(:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_INT2D_WRAP_PACKED - - SUBROUTINE FIELD_INT2D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_INT2D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(2) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(2) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(2) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(2) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(2) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 2 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(2) = 1 - REAL_UBOUNDS(2) = UBOUNDS(2) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * SIZEOF(1_JPIM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_INT2D_ALLOCATE - - FUNCTION FIELD_INT2D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_INT2D) :: SELF - INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):) => SELF%PTR(:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPIM - END IF - END FUNCTION FIELD_INT2D_GET_VIEW - - SUBROUTINE FIELD_INT2D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_INT2D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_INT2D_DELETE_DEVICE - - SUBROUTINE FIELD_INT2D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT2D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_INT2D_FINAL - - SUBROUTINE FIELD_INT2D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT2D_WRAPPER) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT2D_FINAL - END SUBROUTINE FIELD_INT2D_WRAPPER_FINAL - - SUBROUTINE FIELD_INT2D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT2D_WRAPPER_PACKED) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT2D_FINAL - END SUBROUTINE FIELD_INT2D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_INT2D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT2D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_INT2D_FINAL - END SUBROUTINE FIELD_INT2D_OWNER_FINAL - - SUBROUTINE FIELD_INT2D_ENSURE_HOST(SELF) - CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_INT2D_ENSURE_HOST - - SUBROUTINE FIELD_INT2D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):) => SELF%PTR (:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT2D_GET_HOST_DATA - - SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_INT2D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_INT2D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT2D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):) => SELF%DEVPTR(:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA - - SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_INT2D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT2D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_INT2D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT2D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_INT2D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%PTR(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_INT2D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%BASE_PTR(:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:, SELF%FIDX,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT2D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_INT2D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_INT2D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_INT2D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT2D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_INT3D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT3D_WRAPPER), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 3)) THEN - CALL ABOR1 ('FIELD_INT3D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_INT3D_WRAP - - SUBROUTINE FIELD_INT3D_UPDATE_DEVICE(SELF) - CLASS(FIELD_INT3D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE - - SUBROUTINE FIELD_INT3D_UPDATE_HOST(SELF) - CLASS(FIELD_INT3D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT3D_UPDATE_HOST - - SUBROUTINE FIELD_INT3D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT3D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA(:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_INT3D_WRAP_PACKED - - SUBROUTINE FIELD_INT3D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_INT3D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(3) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(3) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(3) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(3) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(3) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 3 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(3) = 1 - REAL_UBOUNDS(3) = UBOUNDS(3) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & SIZEOF(1_JPIM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_INT3D_ALLOCATE - - FUNCTION FIELD_INT3D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_INT3D) :: SELF - INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):) => SELF%PTR(:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:) = 0.0_JPIM - END IF - END FUNCTION FIELD_INT3D_GET_VIEW - - SUBROUTINE FIELD_INT3D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_INT3D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_INT3D_DELETE_DEVICE - - SUBROUTINE FIELD_INT3D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT3D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_INT3D_FINAL - - SUBROUTINE FIELD_INT3D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT3D_WRAPPER) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT3D_FINAL - END SUBROUTINE FIELD_INT3D_WRAPPER_FINAL - - SUBROUTINE FIELD_INT3D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT3D_WRAPPER_PACKED) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT3D_FINAL - END SUBROUTINE FIELD_INT3D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_INT3D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT3D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_INT3D_FINAL - END SUBROUTINE FIELD_INT3D_OWNER_FINAL - - SUBROUTINE FIELD_INT3D_ENSURE_HOST(SELF) - CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_INT3D_ENSURE_HOST - - SUBROUTINE FIELD_INT3D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%PTR (:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT3D_GET_HOST_DATA - - SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_INT3D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_INT3D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT3D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%DEVPTR(:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA - - SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_INT3D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT3D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_INT3D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT3D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_INT3D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%PTR(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_INT3D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%BASE_PTR(:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT3D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_INT3D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT3D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_INT3D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_INT3D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT3D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_INT4D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT4D_WRAPPER), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 4)) THEN - CALL ABOR1 ('FIELD_INT4D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_INT4D_WRAP - - SUBROUTINE FIELD_INT4D_UPDATE_DEVICE(SELF) - CLASS(FIELD_INT4D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE - - SUBROUTINE FIELD_INT4D_UPDATE_HOST(SELF) - CLASS(FIELD_INT4D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT4D_UPDATE_HOST - - SUBROUTINE FIELD_INT4D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT4D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA(:,:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_INT4D_WRAP_PACKED - - SUBROUTINE FIELD_INT4D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_INT4D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(4) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(4) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(4) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(4) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(4) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 4 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(4) = 1 - REAL_UBOUNDS(4) = UBOUNDS(4) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * SIZEOF(1_JPIM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_INT4D_ALLOCATE - - FUNCTION FIELD_INT4D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_INT4D) :: SELF - INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):) => SELF%PTR(:,:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPIM - END IF - END FUNCTION FIELD_INT4D_GET_VIEW - - SUBROUTINE FIELD_INT4D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_INT4D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_INT4D_DELETE_DEVICE - - SUBROUTINE FIELD_INT4D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT4D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_INT4D_FINAL - - SUBROUTINE FIELD_INT4D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT4D_WRAPPER) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT4D_FINAL - END SUBROUTINE FIELD_INT4D_WRAPPER_FINAL - - SUBROUTINE FIELD_INT4D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT4D_WRAPPER_PACKED) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT4D_FINAL - END SUBROUTINE FIELD_INT4D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_INT4D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT4D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_INT4D_FINAL - END SUBROUTINE FIELD_INT4D_OWNER_FINAL - - SUBROUTINE FIELD_INT4D_ENSURE_HOST(SELF) - CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_INT4D_ENSURE_HOST - - SUBROUTINE FIELD_INT4D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%PTR (:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT4D_GET_HOST_DATA - - SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_INT4D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_INT4D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT4D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%DEVPTR(:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA - - SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_INT4D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT4D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_INT4D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT4D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_INT4D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%PTR(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_INT4D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT4D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_INT4D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT4D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_INT4D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_INT4D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT4D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_INT5D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT5D_WRAPPER), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 5)) THEN - CALL ABOR1 ('FIELD_INT5D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_INT5D_WRAP - - SUBROUTINE FIELD_INT5D_UPDATE_DEVICE(SELF) - CLASS(FIELD_INT5D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE - - SUBROUTINE FIELD_INT5D_UPDATE_HOST(SELF) - CLASS(FIELD_INT5D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_INT5D_UPDATE_HOST - - SUBROUTINE FIELD_INT5D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_INT5D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA(:,:,:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_INT5D_WRAP_PACKED - - SUBROUTINE FIELD_INT5D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_INT5D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(5) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(5) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(5) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(5) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(5) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 5 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(5) = 1 - REAL_UBOUNDS(5) = UBOUNDS(5) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * (REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1) * SIZEOF(1_JPIM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1,REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):,REAL_LBOUNDS(5):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_INT5D_ALLOCATE - - FUNCTION FIELD_INT5D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_INT5D) :: SELF - INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):,LBOUNDS(4):) => SELF%PTR(:,:,:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:,:) = 0.0_JPIM - END IF - END FUNCTION FIELD_INT5D_GET_VIEW - - SUBROUTINE FIELD_INT5D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_INT5D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_INT5D_DELETE_DEVICE - - SUBROUTINE FIELD_INT5D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT5D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_INT5D_FINAL - - SUBROUTINE FIELD_INT5D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT5D_WRAPPER) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT5D_FINAL - END SUBROUTINE FIELD_INT5D_WRAPPER_FINAL - - SUBROUTINE FIELD_INT5D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT5D_WRAPPER_PACKED) :: SELF - INTEGER(KIND=JPIM), POINTER :: PTR(:,:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_INT5D_FINAL - END SUBROUTINE FIELD_INT5D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_INT5D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_INT5D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_INT5D_FINAL - END SUBROUTINE FIELD_INT5D_OWNER_FINAL - - SUBROUTINE FIELD_INT5D_ENSURE_HOST(SELF) - CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_INT5D_ENSURE_HOST - - SUBROUTINE FIELD_INT5D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%PTR (:,:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT5D_GET_HOST_DATA - - SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_INT5D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_INT5D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT5D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%DEVPTR(:,:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA - - SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_INT5D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_INT5D), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_INT5D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_INT5D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_INT5D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%PTR(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_INT5D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,:,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_INT5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT5D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_INT5D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT5D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_INT5D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_INT5D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(1_JPIM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_INT5D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_LOG2D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG2D_WRAPPER), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 2)) THEN - CALL ABOR1 ('FIELD_LOG2D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_LOG2D_WRAP - - SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE(SELF) - CLASS(FIELD_LOG2D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE - - SUBROUTINE FIELD_LOG2D_UPDATE_HOST(SELF) - CLASS(FIELD_LOG2D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG2D_UPDATE_HOST - - SUBROUTINE FIELD_LOG2D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG2D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(2) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):) => DATA(:,IDX,:) - ELSE - SELF%PTR => DATA(:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_LOG2D_WRAP_PACKED - - SUBROUTINE FIELD_LOG2D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_LOG2D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(2) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(2) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(2) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(2) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(2) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 2 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(2) = 1 - REAL_UBOUNDS(2) = UBOUNDS(2) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * SIZEOF(.TRUE._JPLM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_LOG2D_ALLOCATE - - FUNCTION FIELD_LOG2D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_LOG2D) :: SELF - LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):) => SELF%PTR(:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = .FALSE. - END IF - END FUNCTION FIELD_LOG2D_GET_VIEW - - SUBROUTINE FIELD_LOG2D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_LOG2D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_LOG2D_DELETE_DEVICE - - SUBROUTINE FIELD_LOG2D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG2D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_LOG2D_FINAL - - SUBROUTINE FIELD_LOG2D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG2D_WRAPPER) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG2D_FINAL - END SUBROUTINE FIELD_LOG2D_WRAPPER_FINAL - - SUBROUTINE FIELD_LOG2D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG2D_WRAPPER_PACKED) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG2D_FINAL - END SUBROUTINE FIELD_LOG2D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_LOG2D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG2D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_LOG2D_FINAL - END SUBROUTINE FIELD_LOG2D_OWNER_FINAL - - SUBROUTINE FIELD_LOG2D_ENSURE_HOST(SELF) - CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_LOG2D_ENSURE_HOST - - SUBROUTINE FIELD_LOG2D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):) => SELF%PTR (:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG2D_GET_HOST_DATA - - SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_LOG2D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_LOG2D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG2D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(2) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):) => SELF%DEVPTR(:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA - - SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_LOG2D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG2D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:) - - CALL FIELD_LOG2D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG2D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_LOG2D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%PTR(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_LOG2D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,IBL), SELF%BASE_PTR(:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG2D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 2) - LBOUND(SELF%DATA, 2) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 2), UBOUND(SELF%PTR, 2) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:, SELF%FIDX,IBL), SELF%DEVDATA(:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG2D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_LOG2D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_LOG2D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_LOG2D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG2D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_LOG3D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG3D_WRAPPER), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 3)) THEN - CALL ABOR1 ('FIELD_LOG3D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_LOG3D_WRAP - - SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE(SELF) - CLASS(FIELD_LOG3D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE - - SUBROUTINE FIELD_LOG3D_UPDATE_HOST(SELF) - CLASS(FIELD_LOG3D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG3D_UPDATE_HOST - - SUBROUTINE FIELD_LOG3D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG3D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(3) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => DATA(:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_LOG3D_WRAP_PACKED - - SUBROUTINE FIELD_LOG3D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_LOG3D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(3) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(3) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(3) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(3) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(3) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 3 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(3) = 1 - REAL_UBOUNDS(3) = UBOUNDS(3) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & SIZEOF(.TRUE._JPLM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_LOG3D_ALLOCATE - - FUNCTION FIELD_LOG3D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_LOG3D) :: SELF - LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):) => SELF%PTR(:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:) = .FALSE. - END IF - END FUNCTION FIELD_LOG3D_GET_VIEW - - SUBROUTINE FIELD_LOG3D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_LOG3D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_LOG3D_DELETE_DEVICE - - SUBROUTINE FIELD_LOG3D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG3D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_LOG3D_FINAL - - SUBROUTINE FIELD_LOG3D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG3D_WRAPPER) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG3D_FINAL - END SUBROUTINE FIELD_LOG3D_WRAPPER_FINAL - - SUBROUTINE FIELD_LOG3D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG3D_WRAPPER_PACKED) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG3D_FINAL - END SUBROUTINE FIELD_LOG3D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_LOG3D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG3D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_LOG3D_FINAL - END SUBROUTINE FIELD_LOG3D_OWNER_FINAL - - SUBROUTINE FIELD_LOG3D_ENSURE_HOST(SELF) - CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_LOG3D_ENSURE_HOST - - SUBROUTINE FIELD_LOG3D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%PTR (:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG3D_GET_HOST_DATA - - SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_LOG3D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_LOG3D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG3D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(3) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):) => SELF%DEVPTR(:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA - - SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_LOG3D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG3D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:) - - CALL FIELD_LOG3D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG3D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_LOG3D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%PTR(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_LOG3D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,IBL), SELF%BASE_PTR(:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG3D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 3) - LBOUND(SELF%DATA, 3) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 3), UBOUND(SELF%PTR, 3) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG3D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_LOG3D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG3D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_LOG3D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_LOG3D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG3D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_LOG4D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG4D_WRAPPER), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 4)) THEN - CALL ABOR1 ('FIELD_LOG4D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_LOG4D_WRAP - - SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE(SELF) - CLASS(FIELD_LOG4D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE - - SUBROUTINE FIELD_LOG4D_UPDATE_HOST(SELF) - CLASS(FIELD_LOG4D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG4D_UPDATE_HOST - - SUBROUTINE FIELD_LOG4D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG4D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(4) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => DATA(:,:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_LOG4D_WRAP_PACKED - - SUBROUTINE FIELD_LOG4D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_LOG4D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(4) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(4) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(4) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(4) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(4) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 4 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(4) = 1 - REAL_UBOUNDS(4) = UBOUNDS(4) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * SIZEOF(.TRUE._JPLM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_LOG4D_ALLOCATE - - FUNCTION FIELD_LOG4D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_LOG4D) :: SELF - LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):) => SELF%PTR(:,:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:) = .FALSE. - END IF - END FUNCTION FIELD_LOG4D_GET_VIEW - - SUBROUTINE FIELD_LOG4D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_LOG4D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_LOG4D_DELETE_DEVICE - - SUBROUTINE FIELD_LOG4D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG4D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_LOG4D_FINAL - - SUBROUTINE FIELD_LOG4D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG4D_WRAPPER) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG4D_FINAL - END SUBROUTINE FIELD_LOG4D_WRAPPER_FINAL - - SUBROUTINE FIELD_LOG4D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG4D_WRAPPER_PACKED) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG4D_FINAL - END SUBROUTINE FIELD_LOG4D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_LOG4D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG4D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_LOG4D_FINAL - END SUBROUTINE FIELD_LOG4D_OWNER_FINAL - - SUBROUTINE FIELD_LOG4D_ENSURE_HOST(SELF) - CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_LOG4D_ENSURE_HOST - - SUBROUTINE FIELD_LOG4D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%PTR (:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG4D_GET_HOST_DATA - - SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_LOG4D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_LOG4D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG4D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(4) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):) => SELF%DEVPTR(:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA - - SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_LOG4D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG4D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:) - - CALL FIELD_LOG4D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG4D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_LOG4D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%PTR(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_LOG4D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,IBL), SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG4D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 4) - LBOUND(SELF%DATA, 4) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 4), UBOUND(SELF%PTR, 4) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG4D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_LOG4D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG4D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_LOG4D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_LOG4D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG4D_UPDATE_HOST_OWNER - - SUBROUTINE FIELD_LOG5D_WRAP(SELF, DATA, PERSISTENT, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG5D_WRAPPER), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) - LOGICAL :: LLPERSISTENT - - LLPERSISTENT = .TRUE. - IF (PRESENT (PERSISTENT)) LLPERSISTENT = PERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA - ELSE - SELF%PTR => DATA - ENDIF - SELF%THREAD_BUFFER = .NOT. LLPERSISTENT - SELF%ISTATUS = NHSTFRESH - - IF (.NOT. LLPERSISTENT) THEN - IF (OML_MAX_THREADS() /= SIZE (DATA, 5)) THEN - CALL ABOR1 ('FIELD_LOG5D_WRAP: DIMENSION MISMATCH') - ENDIF - ENDIF - - END SUBROUTINE FIELD_LOG5D_WRAP - - SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE(SELF) - CLASS(FIELD_LOG5D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE - - SUBROUTINE FIELD_LOG5D_UPDATE_HOST(SELF) - CLASS(FIELD_LOG5D), INTENT(INOUT) :: SELF - - PRINT *, "Should never arrive here" - ERROR STOP - - END SUBROUTINE FIELD_LOG5D_UPDATE_HOST - - SUBROUTINE FIELD_LOG5D_WRAP_PACKED(SELF, DATA, IDX, LBOUNDS) - ! Create FIELD object by wrapping existing data - CLASS(FIELD_LOG5D_WRAPPER_PACKED), INTENT(INOUT) :: SELF - LOGICAL(KIND=JPLM), TARGET, INTENT(IN) :: DATA(:,:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: LBOUNDS(5) - LOGICAL :: LLPERSISTENT - - IF (PRESENT(LBOUNDS)) THEN - SELF%PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => DATA(:,:,:,:,IDX,:) - ELSE - SELF%PTR => DATA(:,:,:,:,IDX,:) - ENDIF - SELF%THREAD_BUFFER = .FALSE. - SELF%ISTATUS = NHSTFRESH - - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - END SUBROUTINE FIELD_LOG5D_WRAP_PACKED - - SUBROUTINE FIELD_LOG5D_ALLOCATE (SELF, LBOUNDS, UBOUNDS, PERSISTENT) - ! Create FIELD object by explicitly allocating new data - CLASS(FIELD_LOG5D_OWNER) :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: LBOUNDS(5) - INTEGER(KIND=JPIM), INTENT(IN) :: UBOUNDS(5) - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: REAL_LBOUNDS(5) - INTEGER(KIND=JPIM) :: REAL_UBOUNDS(5) - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - - REAL_LBOUNDS=LBOUNDS - REAL_UBOUNDS=UBOUNDS - REAL_UBOUNDS(5) = OML_MAX_THREADS() - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - - SELF%LAST_CONTIGUOUS_DIMENSION = 5 - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - SELF%THREAD_BUFFER = .FALSE. - REAL_LBOUNDS(5) = 1 - REAL_UBOUNDS(5) = UBOUNDS(5) - END IF - END IF - - ARRSIZE = (REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1) * (REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1) * (REAL_UBOUNDS(3)-REAL_LBOUNDS(3)+1) *& - & (REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1) * (REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1) * SIZEOF(.TRUE._JPLM) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(SELF%HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(SELF%HPTR, SELF%DATA, [REAL_UBOUNDS(1)-REAL_LBOUNDS(1)+1,REAL_UBOUNDS(2)-REAL_LBOUNDS(2)+1,REAL_UBOUNDS(3)-REA& - &L_LBOUNDS(3)+1,REAL_UBOUNDS(4)-REAL_LBOUNDS(4)+1,REAL_UBOUNDS(5)-REAL_LBOUNDS(5)+1]) - SELF%PTR(REAL_LBOUNDS(1):,REAL_LBOUNDS(2):,REAL_LBOUNDS(3):,REAL_LBOUNDS(4):,REAL_LBOUNDS(5):) => SELF%DATA - - SELF%ISTATUS = NHSTFRESH - END SUBROUTINE FIELD_LOG5D_ALLOCATE - - FUNCTION FIELD_LOG5D_GET_VIEW(SELF, BLOCK_INDEX, ZERO) RESULT(VIEW_PTR) - CLASS(FIELD_LOG5D) :: SELF - LOGICAL(KIND=JPLM), POINTER :: VIEW_PTR(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - - LBOUNDS=LBOUND(SELF%PTR) - VIEW_PTR(LBOUNDS(1):,LBOUNDS(2):,LBOUNDS(3):,LBOUNDS(4):) => SELF%PTR(:,:,:,:,IDX) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:,:) = .FALSE. - END IF - END FUNCTION FIELD_LOG5D_GET_VIEW - - SUBROUTINE FIELD_LOG5D_DELETE_DEVICE(SELF) - ! Delete the copy of this field on GPU device - CLASS(FIELD_LOG5D) :: SELF - - IF (ASSOCIATED (SELF%DEVPTR)) THEN - DEALLOCATE (SELF%DEVDATA) - NULLIFY(SELF%DEVPTR) - ENDIF - END SUBROUTINE FIELD_LOG5D_DELETE_DEVICE - - SUBROUTINE FIELD_LOG5D_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG5D) :: SELF - CALL SELF%DELETE_DEVICE() - NULLIFY(SELF%PTR) - END SUBROUTINE FIELD_LOG5D_FINAL - - SUBROUTINE FIELD_LOG5D_WRAPPER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG5D_WRAPPER) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG5D_FINAL - END SUBROUTINE FIELD_LOG5D_WRAPPER_FINAL - - SUBROUTINE FIELD_LOG5D_WRAPPER_PACKED_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG5D_WRAPPER_PACKED) :: SELF - LOGICAL(KIND=JPLM), POINTER :: PTR(:,:,:,:,:) - CALL SELF%GET_HOST_DATA_RDONLY(PTR) - CALL SELF%FIELD_LOG5D_FINAL - END SUBROUTINE FIELD_LOG5D_WRAPPER_PACKED_FINAL - - SUBROUTINE FIELD_LOG5D_OWNER_FINAL(SELF) - ! Finalizes field and deallocates owned data - CLASS(FIELD_LOG5D_OWNER) :: SELF - INTEGER(KIND=JPIM) :: ISTAT - - ISTAT = CUDAFREEHOST(SELF%HPTR) - NULLIFY(SELF%DATA) - CALL SELF%FIELD_LOG5D_FINAL - END SUBROUTINE FIELD_LOG5D_OWNER_FINAL - - SUBROUTINE FIELD_LOG5D_ENSURE_HOST(SELF) - CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF - - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - - END SUBROUTINE FIELD_LOG5D_ENSURE_HOST - - SUBROUTINE FIELD_LOG5D_GET_HOST_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NHSTFRESH) == 0) THEN - CALL SELF%UPDATE_HOST() - SELF%ISTATUS = IOR (SELF%ISTATUS, NHSTFRESH) - ENDIF - PTR (LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%PTR (:,:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NDEVFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG5D_GET_HOST_DATA - - SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_LOG5D_GET_HOST_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDONLY - - SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_LOG5D_GET_HOST_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG5D_GET_HOST_DATA_RDWR - - SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA (SELF, MODE, PTR) - CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF - INTEGER (KIND=JPIM), INTENT (IN) :: MODE - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - INTEGER(KIND=JPIM) :: LBOUNDS(5) - - LBOUNDS=LBOUND(SELF%PTR) - IF (IAND (SELF%ISTATUS, NDEVFRESH) == 0) THEN - IF (.NOT. ALLOCATED(SELF%DEVDATA))THEN - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - ENDIF - CALL SELF%UPDATE_DEVICE() - SELF%ISTATUS = IOR (SELF%ISTATUS, NDEVFRESH) - ENDIF - PTR(LBOUNDS(1):, LBOUNDS(2):, LBOUNDS(3):, LBOUNDS(4):, LBOUNDS(5):) => SELF%DEVPTR(:,:,:,:,:) - IF (IAND (MODE, NWR) /= 0) THEN - SELF%ISTATUS = IAND (SELF%ISTATUS, NOT (NHSTFRESH)) - ENDIF - - END SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA - - SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDONLY (SELF, PTR) - CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_LOG5D_GET_DEVICE_DATA (SELF, NRD, PTR) - - END SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDONLY - - SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDWR (SELF, PTR) - CLASS(FIELD_LOG5D), INTENT (INOUT) :: SELF - LOGICAL(KIND=JPLM), POINTER, INTENT(INOUT) :: PTR(:,:,:,:,:) - - CALL FIELD_LOG5D_GET_DEVICE_DATA (SELF, IOR (NRD, NWR), PTR) - - END SUBROUTINE FIELD_LOG5D_GET_DEVICE_DATA_RDWR - - - SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER(SELF) - CLASS(FIELD_LOG5D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%PTR(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER - - SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER(SELF) - CLASS(FIELD_LOG5D_WRAPPER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%PTR(:,:,:,:,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER - - SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA(:,:,:,:,IBL), SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER_PACKED(SELF) - CLASS(FIELD_LOG5D_WRAPPER_PACKED), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE, BLKSIZE, NBLOCKS, IBL - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - NBLOCKS = UBOUND(SELF%DATA, 5) - LBOUND(SELF%DATA, 5) + 1 - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - BLKSIZE = ARRSIZE/NBLOCKS - - CALL CPU_TIME(START) - DO IBL=LBOUND(SELF%PTR, 5), UBOUND(SELF%PTR, 5) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%BASE_PTR(:,:,:,:, SELF%FIDX,IBL), SELF%DEVDATA(:,:,:,:,IBL), BLKSIZE) - END DO - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - DEALLOCATE(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG5D_UPDATE_HOST_WRAPPER_PACKED - - SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_OWNER(SELF) - CLASS(FIELD_LOG5D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL ACC_MAP_DATA(SELF%DATA, SELF%DEVDATA, ARRSIZE) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_TO_DEVICE(SELF%DEVDATA, SELF%DATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - HPTR = ACC_HOSTPTR(SELF%DEVDATA) - CALL C_F_POINTER(HPTR, SELF%DEVPTR, SHAPE(SELF%DEVDATA)) - - CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG5D_UPDATE_DEVICE_OWNER - - SUBROUTINE FIELD_LOG5D_UPDATE_HOST_OWNER(SELF) - CLASS(FIELD_LOG5D_OWNER), INTENT (INOUT) :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - TYPE(C_PTR) :: HPTR - REAL :: START, FINISH - - ARRSIZE = SIZE(SELF%DATA) * SIZEOF(.TRUE._JPLM) - - CALL CPU_TIME(START) - CALL ACC_MEMCPY_FROM_DEVICE(SELF%DATA, SELF%DEVDATA, ARRSIZE) - CALL CPU_TIME(FINISH) - - CALL ACC_UNMAP_DATA(SELF%DATA) - - CALL SELF%STATS%INC_GPU_TO_CPU_TRANSFER(START, FINISH) - - END SUBROUTINE FIELD_LOG5D_UPDATE_HOST_OWNER - - - FUNCTION MALLOC_HOST_PINNED_2D(SHAPE, NBLOCKS) RESULT(PTR) - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:) - - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - TYPE(C_PTR) :: HPTR - - ARRSIZE = SHAPE(1) * NBLOCKS * SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), NBLOCKS] ) - - END FUNCTION - - FUNCTION MALLOC_HOST_PINNED_3D(SHAPE, NBLOCKS) RESULT(PTR) - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:,:) - - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - TYPE(C_PTR) :: HPTR - - ARRSIZE = SHAPE(1) * SHAPE(2) * NBLOCKS * SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), SHAPE(2), NBLOCKS] ) - - END FUNCTION - - FUNCTION MALLOC_HOST_PINNED_4D(SHAPE, NBLOCKS) RESULT(PTR) - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:,:,:) - - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - TYPE(C_PTR) :: HPTR - - ARRSIZE = SHAPE(1) * SHAPE(2) * SHAPE(3) * NBLOCKS * SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), SHAPE(2), SHAPE(3), NBLOCKS] ) - - END FUNCTION - - FUNCTION MALLOC_HOST_PINNED_5D(SHAPE, NBLOCKS) RESULT(PTR) - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(4) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PTR(:,:,:,:,:) - - INTEGER(KIND=JPIM) :: ISTAT, ARRSIZE - TYPE(C_PTR) :: HPTR - - ARRSIZE = SHAPE(1) * SHAPE(2) * SHAPE(3) * SHAPE(4) * NBLOCKS * SIZEOF(1.0_JPRB) - ISTAT = CUDASETDEVICEFLAGS(CUDADEVICEMAPHOST) - ISTAT = CUDAHOSTALLOC(HPTR, ARRSIZE, CUDAHOSTALLOCMAPPED) - CALL C_F_POINTER(HPTR, PTR, [SHAPE(1), SHAPE(2), SHAPE(3), SHAPE(4), NBLOCKS] ) - - END FUNCTION - - - SUBROUTINE INC_CPU_TO_GPU_TRANSFER(SELF, START, FINISH) - CLASS(GPU_STATS), INTENT(INOUT) :: SELF - REAL, INTENT(IN) :: START, FINISH - SELF%TRANSFER_CPU_TO_GPU = SELF%TRANSFER_CPU_TO_GPU + 1 - SELF%TOTAL_TIME_TRANSFER_CPU_TO_GPU = SELF%TOTAL_TIME_TRANSFER_CPU_TO_GPU + FINISH - START - END SUBROUTINE - - SUBROUTINE INC_GPU_TO_CPU_TRANSFER(SELF, START, FINISH) - CLASS(GPU_STATS), INTENT(INOUT) :: SELF - REAL, INTENT(IN) :: START, FINISH - SELF%TRANSFER_GPU_TO_CPU = SELF%TRANSFER_GPU_TO_CPU + 1 - SELF%TOTAL_TIME_TRANSFER_GPU_TO_CPU = SELF%TOTAL_TIME_TRANSFER_GPU_TO_CPU + FINISH - START - END SUBROUTINE - - -! -! HELPERS -! - - INTEGER (KIND=JPIM) FUNCTION FIELD_2D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - REAL(KIND=JPRB), POINTER :: PTR (:,:) - INTEGER*8 :: ISTRIDE (2) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 2 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1)) - LOC (PTR (1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2)) - LOC (PTR (1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - END FUNCTION FIELD_2D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_3D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - REAL(KIND=JPRB), POINTER :: PTR (:,:,:) - INTEGER*8 :: ISTRIDE (3) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 3 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - END FUNCTION FIELD_3D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_4D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - REAL(KIND=JPRB), POINTER :: PTR (:,:,:,:) - INTEGER*8 :: ISTRIDE (4) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 4 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - IF (LOC (PTR (1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (4)) THEN - RETURN - ENDIF - - JDIM = 4 - - END FUNCTION FIELD_4D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_5D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - REAL(KIND=JPRB), POINTER :: PTR (:,:,:,:,:) - INTEGER*8 :: ISTRIDE (5) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 5 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - IF (LOC (PTR (1, 1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (4)) THEN - RETURN - ENDIF - - JDIM = 4 - - IF (LOC (PTR (1, 1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (5)) THEN - RETURN - ENDIF - - JDIM = 5 - - END FUNCTION FIELD_5D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_INT2D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - INTEGER(KIND=JPIM), POINTER :: PTR (:,:) - INTEGER*8 :: ISTRIDE (2) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 2 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1)) - LOC (PTR (1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2)) - LOC (PTR (1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - END FUNCTION FIELD_INT2D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_INT3D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - INTEGER(KIND=JPIM), POINTER :: PTR (:,:,:) - INTEGER*8 :: ISTRIDE (3) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 3 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - END FUNCTION FIELD_INT3D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_INT4D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - INTEGER(KIND=JPIM), POINTER :: PTR (:,:,:,:) - INTEGER*8 :: ISTRIDE (4) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 4 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - IF (LOC (PTR (1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (4)) THEN - RETURN - ENDIF - - JDIM = 4 - - END FUNCTION FIELD_INT4D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_INT5D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - INTEGER(KIND=JPIM), POINTER :: PTR (:,:,:,:,:) - INTEGER*8 :: ISTRIDE (5) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 5 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - IF (LOC (PTR (1, 1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (4)) THEN - RETURN - ENDIF - - JDIM = 4 - - IF (LOC (PTR (1, 1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (5)) THEN - RETURN - ENDIF - - JDIM = 5 - - END FUNCTION FIELD_INT5D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_LOG2D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - LOGICAL(KIND=JPLM), POINTER :: PTR (:,:) - INTEGER*8 :: ISTRIDE (2) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 2 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1)) - LOC (PTR (1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2)) - LOC (PTR (1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - END FUNCTION FIELD_LOG2D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_LOG3D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - LOGICAL(KIND=JPLM), POINTER :: PTR (:,:,:) - INTEGER*8 :: ISTRIDE (3) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 3 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2)) - LOC (PTR (1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - END FUNCTION FIELD_LOG3D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_LOG4D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - LOGICAL(KIND=JPLM), POINTER :: PTR (:,:,:,:) - INTEGER*8 :: ISTRIDE (4) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 4 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - IF (LOC (PTR (1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1)) /= ISTRIDE (4)) THEN - RETURN - ENDIF - - JDIM = 4 - - END FUNCTION FIELD_LOG4D_GET_LAST_CONTIGUOUS_DIMENSION - INTEGER (KIND=JPIM) FUNCTION FIELD_LOG5D_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) - LOGICAL(KIND=JPLM), POINTER :: PTR (:,:,:,:,:) - INTEGER*8 :: ISTRIDE (5) - INTEGER (KIND=JPIM) :: J - - ISTRIDE (1) = KIND (PTR) - DO J = 2, 5 - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO - - JDIM = 0 - IF (LOC (PTR (2, 1, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (1)) THEN - RETURN - ENDIF - - JDIM = 1 - - IF (LOC (PTR (1, 2, 1, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (2)) THEN - RETURN - ENDIF - - JDIM = 2 - - IF (LOC (PTR (1, 1, 2, 1, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (3)) THEN - RETURN - ENDIF - - JDIM = 3 - - IF (LOC (PTR (1, 1, 1, 2, 1)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (4)) THEN - RETURN - ENDIF - - JDIM = 4 - - IF (LOC (PTR (1, 1, 1, 1, 2)) - LOC (PTR (1, 1, 1, 1, 1)) /= ISTRIDE (5)) THEN - RETURN - ENDIF - - JDIM = 5 - - END FUNCTION FIELD_LOG5D_GET_LAST_CONTIGUOUS_DIMENSION - -END MODULE FIELD_MODULE diff --git a/src/common/module/field_module_orig.F90 b/src/common/module/field_module_orig.F90 deleted file mode 100644 index e2268014..00000000 --- a/src/common/module/field_module_orig.F90 +++ /dev/null @@ -1,2189 +0,0 @@ -! (C) Copyright 1988- ECMWF. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. - -MODULE FIELD_MODULE - ! The FIELD types provided by this module provide data abstractions that - ! decouple data storage in memory from the data views used in thread-parallel - ! sections of the code. They are intended to thinly wrap ATLAS_FIELD - ! objects and provide additional features that may later be - ! incorporated into Atlas. They can also provide backward-compatibility - ! for non-Atlas execution modes. - -USE PARKIND1, ONLY: JPIM, JPRB -USE OML_MOD, ONLY: OML_MAX_THREADS, OML_MY_THREAD -USE IEEE_ARITHMETIC, ONLY: IEEE_SIGNALING_NAN - -USE CUDAFOR - -use openacc - -use iso_c_binding - -IMPLICIT NONE - -TYPE FIELD_2D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - REAL(KIND=JPRB), POINTER :: VIEW(:) => NULL() - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - REAL(KIND=JPRB), POINTER :: PTR(:,:) => NULL() - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:) - ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS - - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. - -CONTAINS - - PROCEDURE :: CLONE => FIELD_2D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_2D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_2D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_2D_GET_VIEW - PROCEDURE :: FINAL => FIELD_2D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_2D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_2D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_2D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_2D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_2D_ENSURE_HOST - PROCEDURE :: DELETE_DEVICE => FIELD_2D_DELETE_DEVICE -END TYPE FIELD_2D - -TYPE FIELD_3D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - REAL(KIND=JPRB), POINTER :: VIEW(:,:) => NULL() - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - REAL(KIND=JPRB), POINTER :: PTR(:,:,:) => NULL() - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:) - ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX - - ! ! A separate data pointer that can be used to create - ! ! a contiguous chunk of host memory to cleanly map to - ! ! device, should the %DATA pointer be discontiguous. - ! REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) => NULL() - - REAL(KIND=JPRB), DEVICE, ALLOCATABLE :: DEVDATA(:,:,:) - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS - - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. - -CONTAINS - - PROCEDURE :: CLONE => FIELD_3D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_3D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_3D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_3D_GET_VIEW - PROCEDURE :: FINAL => FIELD_3D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_3D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_3D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_3D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_3D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_3D_ENSURE_HOST - PROCEDURE :: DELETE_DEVICE => FIELD_3D_DELETE_DEVICE -END TYPE FIELD_3D - -TYPE FIELD_4D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - REAL(KIND=JPRB), POINTER :: VIEW(:,:,:) => NULL() - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) => NULL() - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA(:,:,:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA(:,:,:,:) - ! REAL(KIND=JPRB), ALLOCATABLE, PINNED :: DATA(:,:,:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) => NULL() - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS - - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. - -CONTAINS - - PROCEDURE :: CLONE => FIELD_4D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_4D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_4D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_4D_GET_VIEW - PROCEDURE :: FINAL => FIELD_4D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_4D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_4D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_4D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_4D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_4D_ENSURE_HOST - PROCEDURE :: DELETE_DEVICE => FIELD_4D_DELETE_DEVICE -END TYPE FIELD_4D - - -TYPE FIELD_INT2D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - INTEGER(KIND=JPIM), POINTER :: VIEW(:) => NULL() - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - INTEGER(KIND=JPIM), POINTER :: PTR(:,:) => NULL() - ! INTEGER(KIND=JPIM), ALLOCATABLE :: DATA(:,:) - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DATA(:,:) - ! INTEGER(KIND=JPIM), ALLOCATABLE, PINNED :: DATA(:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS - - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. - -CONTAINS - - PROCEDURE :: CLONE => FIELD_INT2D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_INT2D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_INT2D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_INT2D_GET_VIEW - PROCEDURE :: FINAL => FIELD_INT2D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_INT2D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_INT2D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_INT2D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_INT2D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_INT2D_ENSURE_HOST - PROCEDURE :: DELETE_DEVICE => FIELD_INT2D_DELETE_DEVICE -END TYPE FIELD_INT2D - - -TYPE FIELD_LOG2D - ! A FIELD encapsulates a single multi-dimensional array and can - ! provide block-indexed "views" of the data for automating the - ! allocation and parallel iterration of NPROMA blocks. - - ! The data view to be used in thread-parallel sections - ! - ! The underlying view pointer is of rank-1, since we always - ! the horizontal component as a single dimension. - LOGICAL, POINTER :: VIEW(:) => NULL() - - ! TODO: Atlas-based field data storage field - ! TODO: Do we still need to use pointers here? - ! TYPE(ATLAS_FIELD), POINTER :: DATA - - ! Storage pointer for non-Atlas backward-compatibility mode - ! - ! The underlying storage pointer has the rank as the dimension, - ! where the innermost dimension represents the horizontal and - ! the outermost one is the block index. - LOGICAL, POINTER :: PTR(:,:) => NULL() - ! LOGICAL, ALLOCATABLE :: DATA(:,:) - LOGICAL, POINTER, CONTIGUOUS :: DATA(:,:) - ! LOGICAL, ALLOCATABLE, PINNED :: DATA(:,:) - - ! For wrapping discontiguous fields in co-allocated storage - ! arrays (eg. GFL/GMV) also store a CONTIGUOUS base pointer - ! and integer index, to allow block pointer extraction that - ! conforms with CUDA device pointers in PGI. - LOGICAL, POINTER, CONTIGUOUS :: BASE_PTR(:,:,:) => NULL() - INTEGER(KIND=JPIM) :: FIDX - - ! A separate data pointer that can be used to create - ! a contiguous chunk of host memory to cleanly map to - ! device, should the %DATA pointer be discontiguous. - LOGICAL, POINTER, CONTIGUOUS :: DEVPTR(:,:) => NULL() - - ! Number of blocks used in the data layout - INTEGER :: NBLOCKS - - ! Flag indicating whether this field stores real data - LOGICAL :: ACTIVE = .FALSE. - ! Flag indicating the use a single block-buffer per thread - LOGICAL :: THREAD_BUFFER = .FALSE. - ! Flag indicating whether we own the allocated base array - LOGICAL :: OWNED = .TRUE. - ! Flag indicating whether latest data currently resides on device - LOGICAL :: ON_DEVICE = .FALSE. - -CONTAINS - - PROCEDURE :: CLONE => FIELD_LOG2D_CLONE - PROCEDURE :: UPDATE_VIEW => FIELD_LOG2D_UPDATE_VIEW - PROCEDURE :: EXTRACT_VIEW => FIELD_LOG2D_EXTRACT_VIEW - PROCEDURE :: GET_VIEW => FIELD_LOG2D_GET_VIEW - PROCEDURE :: FINAL => FIELD_LOG2D_FINAL - - ! GPU-specific device data transfer API - PROCEDURE :: CREATE_DEVICE => FIELD_LOG2D_CREATE_DEVICE - PROCEDURE :: UPDATE_DEVICE => FIELD_LOG2D_UPDATE_DEVICE - PROCEDURE :: UPDATE_HOST => FIELD_LOG2D_UPDATE_HOST - PROCEDURE :: ENSURE_DEVICE => FIELD_LOG2D_ENSURE_DEVICE - PROCEDURE :: ENSURE_HOST => FIELD_LOG2D_ENSURE_HOST - PROCEDURE :: DELETE_DEVICE => FIELD_LOG2D_DELETE_DEVICE -END TYPE FIELD_LOG2D - - -TYPE FIELD_2D_PTR - ! Struct to hold references to field objects - TYPE(FIELD_2D), POINTER :: PTR => NULL() -END TYPE FIELD_2D_PTR - -TYPE FIELD_2D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:) => NULL() -END TYPE FIELD_2D_VIEW -TYPE FIELD_3D_PTR - ! Struct to hold references to field objects - TYPE(FIELD_3D), POINTER :: PTR => NULL() -END TYPE FIELD_3D_PTR - -TYPE FIELD_3D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:,:) => NULL() -END TYPE FIELD_3D_VIEW -TYPE FIELD_4D_PTR - ! Struct to hold references to field objects - TYPE(FIELD_4D), POINTER :: PTR => NULL() -END TYPE FIELD_4D_PTR - -TYPE FIELD_4D_VIEW - ! Struct to hold array views, so we can make arrays of them - REAL(KIND=JPRB), POINTER :: P(:,:,:) => NULL() -END TYPE FIELD_4D_VIEW - - -INTERFACE FIELD_2D - MODULE PROCEDURE :: FIELD_2D_WRAP - MODULE PROCEDURE :: FIELD_2D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_2D_EMPTY - MODULE PROCEDURE :: FIELD_2D_ALLOCATE -END INTERFACE - -INTERFACE FIELD_3D - MODULE PROCEDURE :: FIELD_3D_WRAP - MODULE PROCEDURE :: FIELD_3D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_3D_EMPTY - MODULE PROCEDURE :: FIELD_3D_ALLOCATE -END INTERFACE - -INTERFACE FIELD_4D - MODULE PROCEDURE :: FIELD_4D_WRAP - MODULE PROCEDURE :: FIELD_4D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_4D_EMPTY - MODULE PROCEDURE :: FIELD_4D_ALLOCATE -END INTERFACE - - -INTERFACE FIELD_INT2D - MODULE PROCEDURE :: FIELD_INT2D_WRAP - MODULE PROCEDURE :: FIELD_INT2D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_INT2D_EMPTY - MODULE PROCEDURE :: FIELD_INT2D_ALLOCATE -END INTERFACE - - -INTERFACE FIELD_LOG2D - MODULE PROCEDURE :: FIELD_LOG2D_WRAP - MODULE PROCEDURE :: FIELD_LOG2D_WRAP_PACKED - ! MODULE PROCEDURE :: FIELD_LOG2D_EMPTY - MODULE PROCEDURE :: FIELD_LOG2D_ALLOCATE -END INTERFACE - - -INTERFACE FILL_BUFFER - MODULE PROCEDURE :: FILL_BUFFER_2D, FILL_BUFFER_3D, FILL_BUFFER_4D - MODULE PROCEDURE :: FILL_BUFFER_INT2D, FILL_BUFFER_LOG2D -END INTERFACE FILL_BUFFER - -INTERFACE FIELD_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_2D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_3D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_4D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_CREATE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_CREATE_DEVICE -END INTERFACE FIELD_CREATE_DEVICE - -INTERFACE FIELD_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_2D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_3D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_4D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_UPDATE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_UPDATE_DEVICE -END INTERFACE FIELD_UPDATE_DEVICE - -INTERFACE FIELD_UPDATE_HOST - MODULE PROCEDURE :: FIELD_2D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_3D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_4D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_INT2D_UPDATE_HOST - MODULE PROCEDURE :: FIELD_LOG2D_UPDATE_HOST -END INTERFACE FIELD_UPDATE_HOST - -INTERFACE FIELD_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_2D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_3D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_4D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_DELETE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_DELETE_DEVICE -END INTERFACE FIELD_DELETE_DEVICE - -INTERFACE GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_2D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_3D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_4D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_INT2D_GET_DEVICE_DATA - MODULE PROCEDURE :: FIELD_LOG2D_GET_DEVICE_DATA -END INTERFACE GET_DEVICE_DATA - -INTERFACE FIELD_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_2D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_3D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_4D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_INT2D_ENSURE_DEVICE - MODULE PROCEDURE :: FIELD_LOG2D_ENSURE_DEVICE -END INTERFACE FIELD_ENSURE_DEVICE - -INTERFACE FIELD_ENSURE_HOST - MODULE PROCEDURE :: FIELD_2D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_3D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_4D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_INT2D_ENSURE_HOST - MODULE PROCEDURE :: FIELD_LOG2D_ENSURE_HOST -END INTERFACE FIELD_ENSURE_HOST - -CONTAINS - - function malloc_host_pinned_2d(shape, nblocks) result(ptr) - integer(kind=jpim), intent(in) :: shape(1) - integer(kind=jpim), intent(in), optional :: nblocks - real(kind=jprb), pointer, contiguous :: ptr(:,:) - - integer(kind=jpim) :: istat, arrsize - type(c_ptr) :: hptr - - arrsize = shape(1) * nblocks * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" - call c_f_pointer(hptr, ptr, [shape(1), nblocks] ) - end function malloc_host_pinned_2d - - function malloc_host_pinned_3d(shape, nblocks) result(ptr) - integer(kind=jpim), intent(in) :: shape(2) - integer(kind=jpim), intent(in), optional :: nblocks - real(kind=jprb), pointer, contiguous :: ptr(:,:,:) - - integer(kind=jpim) :: istat, arrsize - type(c_ptr) :: hptr - - arrsize = shape(1) * shape(2) * nblocks * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" - call c_f_pointer(hptr, ptr, [shape(1), shape(2), nblocks] ) - end function malloc_host_pinned_3d - - function malloc_host_pinned_4d(shape, nblocks) result(ptr) - integer(kind=jpim), intent(in) :: shape(3) - integer(kind=jpim), intent(in), optional :: nblocks - real(kind=jprb), pointer, contiguous :: ptr(:,:,:,:) - - integer(kind=jpim) :: istat, arrsize - type(c_ptr) :: hptr - - arrsize = shape(1) * shape(2) * shape(3) * nblocks * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - if (istat /= 0) print *, "ERROR: Failed to allocate pinned host memory!" - call c_f_pointer(hptr, ptr, [shape(1), shape(2), shape(3), nblocks] ) - end function malloc_host_pinned_4d - - - SUBROUTINE FILL_BUFFER_2D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: IDX - - IDX = INDEX+1 - BUFFER(IDX:) = BUFFER(INDEX) - END SUBROUTINE FILL_BUFFER_2D - - SUBROUTINE FILL_BUFFER_3D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: I, IDX - - IDX = INDEX+1 - DO I=1, SIZE(BUFFER, 2) - BUFFER(IDX:,I) = BUFFER(INDEX,I) - END DO - END SUBROUTINE FILL_BUFFER_3D - - SUBROUTINE FILL_BUFFER_4D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: BUFFER(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: I, J, IDX - - IDX = INDEX+1 - DO I=1, SIZE(BUFFER, 2) - DO J=1, SIZE(BUFFER, 3) - BUFFER(IDX:,I,J) = BUFFER(INDEX,I,J) - END DO - END DO - END SUBROUTINE FILL_BUFFER_4D - - SUBROUTINE FILL_BUFFER_INT2D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: BUFFER(:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: IDX - - IDX = INDEX+1 - BUFFER(IDX:) = BUFFER(INDEX) - END SUBROUTINE FILL_BUFFER_INT2D - - SUBROUTINE FILL_BUFFER_LOG2D(BUFFER, INDEX) - ! Utility routine to fill data buffers (views) - LOGICAL, POINTER, INTENT(INOUT) :: BUFFER(:) - INTEGER(KIND=JPIM), INTENT(IN) :: INDEX - INTEGER(KIND=JPIM) :: IDX - - IDX = INDEX+1 - BUFFER(IDX:) = BUFFER(INDEX) - END SUBROUTINE FILL_BUFFER_LOG2D - - FUNCTION FIELD_2D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_2D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_2D_EMPTY - - FUNCTION FIELD_3D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_3D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(2) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1),SHAPE(2))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_3D_EMPTY - - FUNCTION FIELD_4D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_4D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(3) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1),SHAPE(2),SHAPE(3))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_4D_EMPTY - - FUNCTION FIELD_INT2D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_INT2D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_INT2D_EMPTY - - FUNCTION FIELD_LOG2D_EMPTY(SHAPE) RESULT(SELF) - ! Create FIELD object by wrapping existing data - ! - ! If a SHAPE is provided, a single empty buffer block-sized buffer - ! will be allocated under %VIEW and used by all threads in a - ! thread-parallel region to avoid segfault when dereferencing NULL - ! pointers. Otherwise %DATA and %VIEW will always be unassociated. - TYPE(FIELD_LOG2D) :: SELF - INTEGER(KIND=JPIM), OPTIONAL, INTENT(IN) :: SHAPE(1) - - SELF%PTR => NULL() - IF (PRESENT(SHAPE)) THEN - ALLOCATE(SELF%VIEW(SHAPE(1))) - END IF - SELF%ACTIVE = .FALSE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = 0 - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_LOG2D_EMPTY - - FUNCTION FIELD_2D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:) - - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_2D_WRAP - - FUNCTION FIELD_3D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) - - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 3) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_3D_WRAP - - FUNCTION FIELD_4D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) - - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 4) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_4D_WRAP - - FUNCTION FIELD_INT2D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:) - - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_INT2D_WRAP - - FUNCTION FIELD_LOG2D_WRAP(DATA) RESULT(SELF) - ! Create FIELD object by wrapping existing data - TYPE(FIELD_LOG2D), TARGET :: SELF - LOGICAL, TARGET, INTENT(IN) :: DATA(:,:) - - SELF%PTR => DATA - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_LOG2D_WRAP - - FUNCTION FIELD_2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - - SELF%PTR => DATA(:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - END FUNCTION FIELD_2D_WRAP_PACKED - - FUNCTION FIELD_3D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - - integer(kind=jpim) :: arrsize, istat - type(c_ptr) :: hptr - - SELF%PTR => DATA(:,:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 3) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - - ! arrsize = SIZE(SELF%PTR, 1) * SIZE(SELF%PTR, 2) * SELF%NBLOCKS * sizeof(1.0_JPRB) - ! istat = cudaSetDeviceFlags(cudadevicemaphost) - ! istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - ! call c_f_pointer(hptr, self%data, [SIZE(SELF%PTR, 1), SIZE(SELF%PTR, 2), SELF%NBLOCKS] ) - - END FUNCTION FIELD_3D_WRAP_PACKED - - FUNCTION FIELD_4D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - - SELF%PTR => DATA(:,:,:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 4) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - END FUNCTION FIELD_4D_WRAP_PACKED - - FUNCTION FIELD_INT2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - - SELF%PTR => DATA(:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - END FUNCTION FIELD_INT2D_WRAP_PACKED - - FUNCTION FIELD_LOG2D_WRAP_PACKED(DATA, IDX) RESULT(SELF) - ! Create FIELD object packed in a multi-field buffer by storing a - ! contiguous pointer to existing data and an index. - TYPE(FIELD_LOG2D), TARGET :: SELF - LOGICAL, TARGET, INTENT(IN) :: DATA(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - - SELF%PTR => DATA(:,IDX,:) - SELF%ACTIVE = .TRUE. - SELF%THREAD_BUFFER = .FALSE. - SELF%OWNED = .FALSE. - SELF%NBLOCKS = SIZE(SELF%PTR, 2) - SELF%BASE_PTR => DATA - SELF%FIDX = IDX - END FUNCTION FIELD_LOG2D_WRAP_PACKED - - FUNCTION FIELD_2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK - - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_2D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF - - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) - - arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) - - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_2D_ALLOCATE - - FUNCTION FIELD_3D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK - - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_3D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF - - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),SHAPE(2),NBLK)) - - arrsize = SHAPE(1) * SHAPE(2) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), SHAPE(2), NBLK] ) - - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 3) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_3D_ALLOCATE - - FUNCTION FIELD_4D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_4D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK - - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_4D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF - - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),SHAPE(2),SHAPE(3),NBLK)) - - arrsize = SHAPE(1) * SHAPE(2) * SHAPE(3) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), SHAPE(2), SHAPE(3), NBLK] ) - - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 4) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_4D_ALLOCATE - - FUNCTION FIELD_INT2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK - - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_INT2D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF - - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) - - arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) - - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_INT2D_ALLOCATE - - FUNCTION FIELD_LOG2D_ALLOCATE(SHAPE, NBLOCKS, PERSISTENT) RESULT(SELF) - ! Create FIELD object by explicitly allocating new data - ! - ! Please note that SHAPE is the conceptual shape without the block dimension - TYPE(FIELD_LOG2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1) - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NBLOCKS - LOGICAL, INTENT(IN), OPTIONAL :: PERSISTENT - INTEGER(KIND=JPIM) :: NBLK - - INTEGER(KIND=JPIM) :: istat, arrsize - type(c_ptr) :: hptr - - ! By default we allocate thread-local temporaries - SELF%THREAD_BUFFER = .TRUE. - NBLK = OML_MAX_THREADS() - - IF (PRESENT(PERSISTENT)) THEN - IF (PERSISTENT) THEN - ! Adjust outer dim for full-sized persistent blocked arrays - IF (.NOT. PRESENT(NBLOCKS)) CALL & - & ABOR1('FIELD_LOG2D_ALLOCATE : NBLOCKS not given for persistent allocation!') - SELF%THREAD_BUFFER = .FALSE. - NBLK = NBLOCKS - END IF - END IF - - ! Allocate storage array and store metadata - ! ALLOCATE(SELF%DATA(SHAPE(1),NBLK)) - - arrsize = SHAPE(1) * NBLK * sizeof(1.0_JPRB) - istat = cudaSetDeviceFlags(cudadevicemaphost) - istat = cudaHostAlloc(hptr, arrsize, cudaHostAllocMapped) - call c_f_pointer(hptr, self%data, [SHAPE(1), NBLK] ) - - SELF%PTR => SELF%DATA - SELF%ACTIVE = .TRUE. - SELF%OWNED = .TRUE. - SELF%NBLOCKS = SIZE(SELF%DATA, 2) - SELF%BASE_PTR => NULL() - SELF%FIDX = -1 - END FUNCTION FIELD_LOG2D_ALLOCATE - - FUNCTION FIELD_2D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_2D) :: SELF - TYPE(FIELD_2D), POINTER :: NEWOBJ - - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_2D_CLONE - - FUNCTION FIELD_3D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_3D) :: SELF - TYPE(FIELD_3D), POINTER :: NEWOBJ - - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_3D_CLONE - - FUNCTION FIELD_4D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_4D) :: SELF - TYPE(FIELD_4D), POINTER :: NEWOBJ - - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_4D_CLONE - - FUNCTION FIELD_INT2D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_INT2D) :: SELF - TYPE(FIELD_INT2D), POINTER :: NEWOBJ - - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_INT2D_CLONE - - FUNCTION FIELD_LOG2D_CLONE(SELF) RESULT(NEWOBJ) - ! Clone (deep-copy) this FIELD object, keeping the DATA pointer - ! intact, but replicating view pointers. - CLASS(FIELD_LOG2D) :: SELF - TYPE(FIELD_LOG2D), POINTER :: NEWOBJ - - ALLOCATE(NEWOBJ) - ! For owned storage data, re-allocate but do not copy data over - IF (SELF%OWNED) THEN - ALLOCATE(NEWOBJ%DATA, MOLD=SELF%DATA) - NEWOBJ%PTR => NEWOBJ%DATA - ELSE - NEWOBJ%PTR => SELF%PTR - END IF - NEWOBJ%VIEW => NULL() - NEWOBJ%NBLOCKS = SELF%NBLOCKS - NEWOBJ%THREAD_BUFFER = SELF%THREAD_BUFFER - NEWOBJ%OWNED = .FALSE. - END FUNCTION FIELD_LOG2D_CLONE - - - SUBROUTINE FIELD_2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,IDX) - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_2D_UPDATE_VIEW - - SUBROUTINE FIELD_3D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,:,IDX) - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_3D_UPDATE_VIEW - - SUBROUTINE FIELD_4D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_4D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,:,:,IDX) - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:,:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_4D_UPDATE_VIEW - - SUBROUTINE FIELD_INT2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,IDX) - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:) = 0.0_JPIM - END IF - END SUBROUTINE FIELD_INT2D_UPDATE_VIEW - - - SUBROUTINE FIELD_LOG2D_UPDATE_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Sets the view pointer FIELD%MP to the block of the given index - CLASS(FIELD_LOG2D), TARGET :: SELF - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - SELF%VIEW => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - SELF%VIEW => SELF%PTR(:,IDX) - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(SELF%VIEW, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) SELF%VIEW(:) = .FALSE. - END IF - END SUBROUTINE FIELD_LOG2D_UPDATE_VIEW - - SUBROUTINE FIELD_2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_2D_EXTRACT_VIEW - - SUBROUTINE FIELD_3D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_3D_EXTRACT_VIEW - - SUBROUTINE FIELD_4D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, INTENT(INOUT) :: VIEW_PTR(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB - END IF - END SUBROUTINE FIELD_4D_EXTRACT_VIEW - - SUBROUTINE FIELD_INT2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), POINTER, INTENT(INOUT) :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPIM - END IF - END SUBROUTINE FIELD_INT2D_EXTRACT_VIEW - - SUBROUTINE FIELD_LOG2D_EXTRACT_VIEW(SELF, VIEW_PTR, BLOCK_INDEX, BLOCK_SIZE, ZERO) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_LOG2D), TARGET :: SELF - LOGICAL, POINTER, INTENT(INOUT) :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE) .AND. BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = .FALSE. - END IF - END SUBROUTINE FIELD_LOG2D_EXTRACT_VIEW - - FUNCTION FIELD_2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPRB - END IF - END FUNCTION FIELD_2D_GET_VIEW - - FUNCTION FIELD_3D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:) = 0.0_JPRB - END IF - END FUNCTION FIELD_3D_GET_VIEW - - FUNCTION FIELD_4D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), POINTER :: VIEW_PTR(:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,:,:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,:,:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:,:,:) = 0.0_JPRB - END IF - END FUNCTION FIELD_4D_GET_VIEW - - FUNCTION FIELD_INT2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = 0.0_JPIM - END IF - END FUNCTION FIELD_INT2D_GET_VIEW - - - FUNCTION FIELD_LOG2D_GET_VIEW(SELF, BLOCK_INDEX, BLOCK_SIZE, ZERO) RESULT(VIEW_PTR) - ! Updates internal view and exports it to an external pointer - CLASS(FIELD_LOG2D), TARGET :: SELF - LOGICAL, POINTER :: VIEW_PTR(:) - INTEGER(KIND=JPIM), INTENT(IN) :: BLOCK_INDEX - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: BLOCK_SIZE - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: IDX - - IDX = BLOCK_INDEX - IF (SELF%THREAD_BUFFER) IDX = OML_MY_THREAD() - IF (SELF%ACTIVE .AND. SELF%OWNED) THEN - VIEW_PTR => SELF%DATA(:,IDX) - ELSEIF (SELF%ACTIVE .AND. .NOT. SELF%OWNED) THEN - VIEW_PTR => SELF%PTR(:,IDX) - ELSE - VIEW_PTR => SELF%VIEW ! Set to NaN'd field buffer - END IF - - IF (PRESENT(BLOCK_SIZE)) THEN - IF (BLOCK_INDEX == SELF%NBLOCKS) THEN - ! Fill the the buffer by replicating the last entry - CALL FILL_BUFFER(VIEW_PTR, INDEX=BLOCK_SIZE) - END IF - END IF - - IF (PRESENT(ZERO)) THEN - IF (ZERO) VIEW_PTR(:) = .FALSE. - END IF - END FUNCTION FIELD_LOG2D_GET_VIEW - - - SUBROUTINE FIELD_2D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_2D), TARGET :: SELF - - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_2D_CREATE_DEVICE - - SUBROUTINE FIELD_3D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM) :: ARRSIZE - - ARRSIZE = SIZE(SELF%PTR) * SIZEOF(1.0_JPRB) - ALLOCATE(SELF%DEVDATA, MOLD=SELF%PTR) - CALL ACC_MAP_DATA(SELF%PTR, SELF%DEVDATA, ARRSIZE) - - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_3D_CREATE_DEVICE - - SUBROUTINE FIELD_4D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_4D), TARGET :: SELF - - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_4D_CREATE_DEVICE - - SUBROUTINE FIELD_INT2D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_INT2D), TARGET :: SELF - - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_INT2D_CREATE_DEVICE - - SUBROUTINE FIELD_LOG2D_CREATE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_LOG2D), TARGET :: SELF - - SELF%DEVPTR => SELF%DATA - !$acc enter data create(SELF%DATA) - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_LOG2D_CREATE_DEVICE - - FUNCTION FIELD_2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_2D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:) - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_2D_GET_DEVICE_DATA - - FUNCTION FIELD_3D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_3D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:) - - type(c_ptr) :: hptr - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - hptr = acc_hostptr(self%devdata) - call c_f_pointer(hptr, devptr, shape(self%devdata)) - END IF - END FUNCTION FIELD_3D_GET_DEVICE_DATA - - FUNCTION FIELD_4D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_4D), TARGET :: SELF - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DEVPTR(:,:,:,:) - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_4D_GET_DEVICE_DATA - - FUNCTION FIELD_INT2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM), POINTER, CONTIGUOUS :: DEVPTR(:,:) - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_INT2D_GET_DEVICE_DATA - - FUNCTION FIELD_LOG2D_GET_DEVICE_DATA(SELF) RESULT(DEVPTR) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_LOG2D), TARGET :: SELF - LOGICAL, POINTER, CONTIGUOUS :: DEVPTR(:,:) - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - - IF (SELF%OWNED) THEN - DEVPTR => SELF%DATA - ELSE - DEVPTR => SELF%DEVPTR - END IF - END FUNCTION FIELD_LOG2D_GET_DEVICE_DATA - - SUBROUTINE FIELD_2D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_2D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_2D_UPDATE_DEVICE - - SUBROUTINE FIELD_3D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_3D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - INTEGER(KIND=JPIM) :: istat, arrsize, blksize - type(c_ptr) :: hptr - integer(kind=jpim) :: shape(3) - - logical :: pres - - arrsize = size(self%ptr) * sizeof(1.0_JPRB) - blksize = arrsize / self%nblocks - ALLOCATE(SELF%DEVDATA, mold=SELF%PTR) - - IF (SELF%OWNED) THEN - call acc_map_data(self%data, self%devdata, arrsize) - call acc_memcpy_to_device(self%devdata(:,:,:), self%data(:,:,:), arrsize) - - ELSE - ! TODO: This is a dirty trick to fool the OpenACC runtime! - ! We allocate the associated data array (full size), so that we can - ! add it to the OpenACC host-device map (it's contiguous!) - ! Then, we copy the data in a strided fashio from the discontiguous pointer. - ALLOCATE(SELF%DATA, MOLD=SELF%PTR) - call acc_map_data(self%data, self%devdata, arrsize) - DO IBL=1, SELF%NBLOCKS - call acc_memcpy_to_device(self%devdata(:,:,ibl), self%base_ptr(:,:,self%fidx,ibl), blksize) - END DO - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_3D_UPDATE_DEVICE - - SUBROUTINE FIELD_4D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_4D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:,:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,:,:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_4D_UPDATE_DEVICE - - SUBROUTINE FIELD_INT2D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_INT2D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_INT2D_UPDATE_DEVICE - - SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE(SELF) - ! Create a copy of this field on device and copy data over - CLASS(FIELD_LOG2D), TARGET :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc enter data create(SELF%DATA) - !$acc update device(SELF%DATA(:,:)) - !$acc wait - SELF%DEVPTR => SELF%DATA - ELSE - ALLOCATE(SELF%DEVPTR, SOURCE=SELF%PTR) - !$acc enter data create(SELF%DEVPTR) - DO IBL=1, SELF%NBLOCKS - !$acc update device(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - END IF - SELF%ON_DEVICE = .TRUE. - END SUBROUTINE FIELD_LOG2D_UPDATE_DEVICE - - SUBROUTINE FIELD_2D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_2D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - !$acc update host(SELF%DATA(:,:)) - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:) = SELF%DEVPTR(:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_2D_UPDATE_HOST - - SUBROUTINE FIELD_3D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_3D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - INTEGER(KIND=JPIM) :: istat, arrsize, blksize - type(c_ptr) :: hptr - - arrsize = size(self%ptr) * sizeof(1.0_JPRB) - blksize = arrsize / self%nblocks - - IF (SELF%OWNED) THEN - call acc_memcpy_from_device(self%data(:,:,:), self%devdata(:,:,:), arrsize) - call acc_unmap_data(self%data) - - ELSE - ! call acc_memcpy_from_device(self%data(:,:,:), self%devdata(:,:,:), arrsize) - DO IBL=1, SELF%NBLOCKS - ! self%base_ptr(:,:,self%fidx,ibl) = self%data(:,:,ibl) - - ! call acc_memcpy_from_device(self%ptr(:,:,ibl), self%devdata(:,:,ibl), blksize) - call acc_memcpy_from_device(self%base_ptr(:,:,self%fidx,ibl), self%devdata(:,:,ibl), blksize) - END DO - call acc_unmap_data(self%data) - DEALLOCATE(SELF%DATA) - END IF - - DEALLOCATE(SELF%DEVDATA) - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_3D_UPDATE_HOST - - SUBROUTINE FIELD_4D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_4D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DATA(:,:,:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,:,:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:,:,:) = SELF%DEVPTR(:,:,:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_4D_UPDATE_HOST - - SUBROUTINE FIELD_INT2D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_INT2D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DATA(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:) = SELF%DEVPTR(:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_INT2D_UPDATE_HOST - - SUBROUTINE FIELD_LOG2D_UPDATE_HOST(SELF) - ! Synchronize device data back to host - CLASS(FIELD_LOG2D) :: SELF - INTEGER(KIND=JPIM) :: IBL - - IF (SELF%OWNED) THEN - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DATA(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DATA) - ELSE - DO IBL=1, SELF%NBLOCKS - !$acc update host(SELF%DEVPTR(:,IBL)) - END DO - !$acc wait - !$acc exit data delete(SELF%DEVPTR) - SELF%PTR(:,:) = SELF%DEVPTR(:,:) - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_LOG2D_UPDATE_HOST - - SUBROUTINE FIELD_2D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_2D), TARGET :: SELF - - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_2D_DELETE_DEVICE - - SUBROUTINE FIELD_3D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_3D), TARGET :: SELF - - IF (SELF%OWNED) THEN - CALL ACC_UNMAP_DATA(SELF%DATA) - ELSE - CALL ACC_UNMAP_DATA(SELF%PTR) - END IF - DEALLOCATE(SELF%DEVDATA) - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_3D_DELETE_DEVICE - - SUBROUTINE FIELD_4D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_4D), TARGET :: SELF - - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_4D_DELETE_DEVICE - - SUBROUTINE FIELD_INT2D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_INT2D), TARGET :: SELF - - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_INT2D_DELETE_DEVICE - - SUBROUTINE FIELD_LOG2D_DELETE_DEVICE(SELF) - ! Initialize a copy of this field on GPU device - CLASS(FIELD_LOG2D), TARGET :: SELF - - !$acc exit data delete(SELF%DEVPTR) - IF (SELF%OWNED) THEN - NULLIFY(SELF%DEVPTR) - ELSE - DEALLOCATE(SELF%DEVPTR) - END IF - SELF%ON_DEVICE = .FALSE. - END SUBROUTINE FIELD_LOG2D_DELETE_DEVICE - - SUBROUTINE FIELD_2D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_2D), TARGET :: SELF - - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_2D_ENSURE_HOST - - SUBROUTINE FIELD_3D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_3D), TARGET :: SELF - - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_3D_ENSURE_HOST - - SUBROUTINE FIELD_4D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_4D), TARGET :: SELF - - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_4D_ENSURE_HOST - - SUBROUTINE FIELD_INT2D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_INT2D), TARGET :: SELF - - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_INT2D_ENSURE_HOST - - SUBROUTINE FIELD_LOG2D_ENSURE_HOST(SELF) - ! Ensure that field has been moved back to host - CLASS(FIELD_LOG2D), TARGET :: SELF - - IF (SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_HOST() - END IF - END SUBROUTINE FIELD_LOG2D_ENSURE_HOST - - SUBROUTINE FIELD_2D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_2D), TARGET :: SELF - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_2D_ENSURE_DEVICE - - SUBROUTINE FIELD_3D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_3D), TARGET :: SELF - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_3D_ENSURE_DEVICE - - SUBROUTINE FIELD_4D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_4D), TARGET :: SELF - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_4D_ENSURE_DEVICE - - SUBROUTINE FIELD_INT2D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_INT2D), TARGET :: SELF - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_INT2D_ENSURE_DEVICE - - SUBROUTINE FIELD_LOG2D_ENSURE_DEVICE(SELF) - ! Ensure that field has been moved over to device - CLASS(FIELD_LOG2D), TARGET :: SELF - - IF (.NOT. SELF%ON_DEVICE) THEN - CALL SELF%UPDATE_DEVICE() - END IF - END SUBROUTINE FIELD_LOG2D_ENSURE_DEVICE - - SUBROUTINE FIELD_2D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_2D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_2D_FINAL - - SUBROUTINE FIELD_3D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_3D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_3D_FINAL - - SUBROUTINE FIELD_4D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_4D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_4D_FINAL - - SUBROUTINE FIELD_INT2D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_INT2D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_INT2D_FINAL - - SUBROUTINE FIELD_LOG2D_FINAL(SELF) - ! Finalizes field and dealloactes owned data - CLASS(FIELD_LOG2D) :: SELF - IF (SELF%OWNED) THEN - DEALLOCATE(SELF%DATA) - END IF - NULLIFY(SELF%PTR) - NULLIFY(SELF%VIEW) - END SUBROUTINE FIELD_LOG2D_FINAL - -END MODULE FIELD_MODULE diff --git a/src/common/module/yomphyder.F90 b/src/common/module/yomphyder.F90 index afb1d454..811e1e64 100644 --- a/src/common/module/yomphyder.F90 +++ b/src/common/module/yomphyder.F90 @@ -10,7 +10,7 @@ module yomphyder USE PARKIND1, ONLY : JPIM, JPRB -#ifdef USE_FIELD_API +#ifdef USE_FIELD_API USE FIELD_MODULE, ONLY: FIELD_3D, FIELD_4D #endif From 86fde0a4fde8ae9de41a797ba1db04a4d261a121 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 8 Dec 2022 14:14:35 +0100 Subject: [PATCH 003/174] Made field_api build optional --- bundle.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bundle.yml b/bundle.yml index 0784b76e..b648c75c 100644 --- a/bundle.yml +++ b/bundle.yml @@ -42,6 +42,7 @@ projects : optional: true require : ecbuild cmake : > + BUILD_field_api=OFF ENABLE_FIELD_API_TESTS=OFF ENABLE_FIELD_API_FIAT_BUILD=OFF FIELD_API_UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module @@ -77,6 +78,7 @@ options : ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON ENABLE_CLOUDSC_GPU_SCC_FIELD=ON ENABLE_FIELD_API=ON + BUILD_field_api=ON - with-mpi : help : Enable MPI-parallel kernel From 1a4f976705dd99112c3d84f94571396f178cb9bd Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 24 Jan 2023 17:03:24 +0100 Subject: [PATCH 004/174] Removing commented out code --- src/common/module/cloudsc_field_state_mod.F90 | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/src/common/module/cloudsc_field_state_mod.F90 b/src/common/module/cloudsc_field_state_mod.F90 index 23b9a994..18f9822e 100644 --- a/src/common/module/cloudsc_field_state_mod.F90 +++ b/src/common/module/cloudsc_field_state_mod.F90 @@ -38,9 +38,6 @@ MODULE CLOUDSC_FIELD_STATE_MOD REAL(KIND=JPRB), ALLOCATABLE :: B_TMP(:,:,:,:) REAL(KIND=JPRB), ALLOCATABLE :: B_LOC(:,:,:,:) - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA_RDONLY(:,:,:,:) - ! REAL(KIND=JPRB), ALLOCATABLE :: DATA_RWONLY(:,:,:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA_RDONLY(:,:,:,:) REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA_RWONLY(:,:,:,:) @@ -53,7 +50,6 @@ MODULE CLOUDSC_FIELD_STATE_MOD & F_PLU, F_PLUDE, F_PSNDE, F_PMFU, F_PMFD, F_PA, F_PSUPSAT, F_PCOVPTOT CLASS(FIELD_3D), POINTER :: F_PFSQLF, F_PFSQIF, F_PFCQLNG, F_PFCQNNG, F_PFSQRF, F_PFSQSF, F_PFCQRNG, & & F_PFCQSNG, F_PFSQLTUR, F_PFSQITUR, F_PFPLSL, F_PFPLSN, F_PFHPSL, F_PFHPSN -! CLASS(FIELD_4D), POINTER :: F_PCLV, F_TENDENCY_TMP, F_TENDENCY_LOC CLASS(FIELD_4D), POINTER :: F_PCLV CONTAINS PROCEDURE :: LOAD => CLOUDSC_FIELD_STATE_LOAD @@ -181,18 +177,8 @@ FUNCTION CREATE_FIELD_ALLOCATE_4D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) END IF END FUNCTION CREATE_FIELD_ALLOCATE_4D -! FUNCTION CREATE_FIELD_WRAP_PACKED_2D(DATA, IDX) RESULT(FIELD_PTR) -! ! Create a single 2D field with implicit blocking dimension by wrapping existing data -! TYPE(FIELD_2D), POINTER :: FIELD_PTR -! REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:) -! INTEGER(KIND=JPIM), INTENT(IN) :: IDX -! -! ALLOCATE(FIELD_PTR) -! FIELD_PTR = FIELD_2D(DATA=DATA, IDX=IDX) -! END FUNCTION CREATE_FIELD_WRAP_PACKED_2D - FUNCTION CREATE_FIELD_WRAP_PACKED_3D(DATA, IDX) RESULT(FIELD_PTR) - ! Create a single 1D field with implicit blocking dimension by wrapping existing data + ! Create a single 3D field with implicit blocking dimension by wrapping existing data CLASS(FIELD_3D_WRAPPER_PACKED), POINTER :: FIELD_PTR REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) INTEGER(KIND=JPIM), INTENT(IN) :: IDX From 5ba6dbff07dc2d728b53261e6fcfdb5e42e735c5 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 25 Jan 2023 14:57:11 +0000 Subject: [PATCH 005/174] Updated description of field_api in README --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a9e9d59b..49b5c09e 100644 --- a/README.md +++ b/README.md @@ -76,12 +76,12 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int) C version of CLOUDSC including loop fusion and temporary local array demotion. - **dwarf-cloudsc-gpu-scc-field**: GPU-enabled and optimized version of - CLOUDSC that uses the SCC loop layout, and a dedicated Fortran FIELD - API to manage device offload and copyback. The intent is to demonstrate - the explicit use of pinned host memory to speed-up data transfers, as - provided by the shipped prototype implmentation, and investigate the - effect of different data storage allocation layouts. To enable this - variant, a suitable CUDA installation is required and the + CLOUDSC that uses the SCC loop layout, and uses [FIELD API](https://git.ecmwf.int/projects/RDX/repos/field_api/browse) (a Fortran library purpose-built for IFS data-structures that facilitates the + creation and management of field objects in scientific code) to perform device offload + and copyback. The intent is to demonstrate the explicit use of pinned host memory to speed-up + data transfers, as provided by the shipped prototype implmentation, and + investigate the effect of different data storage allocation layouts. + To enable this variant, a suitable CUDA installation is required and the `--with-cuda` flag needs to be passed at the build stage. ## Download and Installation From 209f437703810ea645b689e4fe087e06dbab8f80 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 23 Feb 2023 15:36:48 +0000 Subject: [PATCH 006/174] Remove redundant FIELD_API option from bundle --- bundle.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bundle.yml b/bundle.yml index b648c75c..8da185e3 100644 --- a/bundle.yml +++ b/bundle.yml @@ -107,10 +107,6 @@ options : help : Frontend parser to use for Loki transformations cmake : LOKI_FRONTEND={{value}} - - field_api-old-copy : - help : Use COPY for GPU offload rather than UPDATE_DEVICE/UPDATE_HOST - cmake : ENABLE_FIELD_API_OLD_COPY=ON - - field_api-testing : help : Enable field_api testing cmake : ENABLE_FIELD_API_TESTS=ON From 426fa57ebe7fe729ceefdc87a539160a9dacfc80 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 1 Mar 2023 13:34:32 +0000 Subject: [PATCH 007/174] Remove redundant -Mcuda link options --- src/cloudsc_gpu/CMakeLists.txt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/cloudsc_gpu/CMakeLists.txt b/src/cloudsc_gpu/CMakeLists.txt index a6c2e4cf..25703d1b 100644 --- a/src/cloudsc_gpu/CMakeLists.txt +++ b/src/cloudsc_gpu/CMakeLists.txt @@ -172,10 +172,6 @@ if( HAVE_CLOUDSC_GPU_SCC_CUF ) DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC_CUF ) - # Small hack around the fact that CMake does not understand - # CUDA-Fortran natively yet. So we simply force linking here. - target_link_options(dwarf-cloudsc-gpu-scc-cuf PUBLIC "-Mcuda") - ecbuild_add_test( TARGET dwarf-cloudsc-gpu-scc-cuf-serial COMMAND bin/dwarf-cloudsc-gpu-scc-cuf @@ -204,10 +200,6 @@ if ( HAVE_CLOUDSC_GPU_SCC_CUF_K_CACHING ) DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC_CUF_K_CACHING ) - # Small hack around the fact that CMake does not understand - # CUDA-Fortran natively yet. So we simply force linking here. - target_link_options(dwarf-cloudsc-gpu-scc-cuf-k-caching PUBLIC "-Mcuda") - ecbuild_add_test( TARGET dwarf-cloudsc-gpu-scc-cuf-k-caching-serial COMMAND bin/dwarf-cloudsc-gpu-scc-cuf-k-caching From bb45cd12c9aebb5384cbcfb02074257fd85b381a Mon Sep 17 00:00:00 2001 From: Zbigniew Piotrowski Date: Tue, 14 Mar 2023 13:34:41 +0000 Subject: [PATCH 008/174] Remove obsolete architectures, add march=znver2 option to gnu compilation --- arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake | 1 + arch/ecmwf/volta/nvhpc/20.9/env.sh | 57 ------- arch/ecmwf/volta/nvhpc/20.9/toolchain.cmake | 1 - arch/ecmwf/volta/nvhpc/22.3/env.sh | 54 ------- arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake | 61 ------- arch/ecmwf/xc40/cray/8.7.7/env.sh | 61 ------- arch/ecmwf/xc40/cray/8.7.7/toolchain.cmake | 1 - arch/ecmwf/xc40/intel/18.0.0/env.sh | 70 -------- arch/ecmwf/xc40/intel/18.0.0/toolchain.cmake | 1 - arch/toolchains/ecmwf-volta-pgi-gpu.cmake | 63 -------- arch/toolchains/ecmwf-volta-pgi-host.cmake | 49 ------ arch/toolchains/ecmwf-xc40-cray.cmake | 142 ----------------- arch/toolchains/ecmwf-xc40-intel.cmake | 149 ------------------ 13 files changed, 1 insertion(+), 709 deletions(-) delete mode 100644 arch/ecmwf/volta/nvhpc/20.9/env.sh delete mode 120000 arch/ecmwf/volta/nvhpc/20.9/toolchain.cmake delete mode 100644 arch/ecmwf/volta/nvhpc/22.3/env.sh delete mode 100644 arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake delete mode 100644 arch/ecmwf/xc40/cray/8.7.7/env.sh delete mode 120000 arch/ecmwf/xc40/cray/8.7.7/toolchain.cmake delete mode 100644 arch/ecmwf/xc40/intel/18.0.0/env.sh delete mode 120000 arch/ecmwf/xc40/intel/18.0.0/toolchain.cmake delete mode 100644 arch/toolchains/ecmwf-volta-pgi-gpu.cmake delete mode 100644 arch/toolchains/ecmwf-volta-pgi-host.cmake delete mode 100644 arch/toolchains/ecmwf-xc40-cray.cmake delete mode 100644 arch/toolchains/ecmwf-xc40-intel.cmake diff --git a/arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake b/arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake index 10709fc3..41a6ceb0 100644 --- a/arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake +++ b/arch/ecmwf/hpc2020/gnu/11.2.0/toolchain.cmake @@ -36,6 +36,7 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fbacktrace") set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-second-underscore") set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ffast-math") set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fno-unsafe-math-optimizations") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=znver2") # This is dangerous! But GNU 10+ complains about argument mismatch for MPI routines set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fallow-argument-mismatch") diff --git a/arch/ecmwf/volta/nvhpc/20.9/env.sh b/arch/ecmwf/volta/nvhpc/20.9/env.sh deleted file mode 100644 index e8231d59..00000000 --- a/arch/ecmwf/volta/nvhpc/20.9/env.sh +++ /dev/null @@ -1,57 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload to be certain -module_unload boost -module_unload cmake -module_unload intel -module_unload pgi -module_unload nvhpc -module_unload nvhpc-nompi -module_unload gnu - -# Load modules -module use /opt/nvidia/hpc_sdk/modulefiles -# module load nvhpc -module_load nvhpc-nompi/20.9 -module_load boost/1.61.0 -module_load cmake/3.19.5 - -set -x - -# Increase stack size to maximum -ulimit -S -s unlimited - -# Fix boost header location -export BOOST_INCLUDEDIR="/usr/local/apps/boost/1.61.0/PGI/17.1/include/" - -# Include local OpenMPI in the path for discovery in build -export PATH="/local/hdd/nabr/openmpi/nvhpc-nompi/20.9/bin:$PATH" - -# Custom HDF5 library build with F03 interfaces -export HDF5_ROOT="/local/hdd/nabr/hdf5/nvhpc/20.9" - -# Restore tracing to stored setting -if [[ -n "$tracing_" ]]; then set -x; else set +x; fi - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" -export ANT_OPTS="-Dhttp.proxyHost=proxy.ecmwf.int -Dhttp.proxyPort=3333 -Dhttps.proxyHost=proxy.ecmwf.int -Dhttps.proxyPort=3333" diff --git a/arch/ecmwf/volta/nvhpc/20.9/toolchain.cmake b/arch/ecmwf/volta/nvhpc/20.9/toolchain.cmake deleted file mode 120000 index cce19996..00000000 --- a/arch/ecmwf/volta/nvhpc/20.9/toolchain.cmake +++ /dev/null @@ -1 +0,0 @@ -../../../../toolchains/ecmwf-volta-pgi-gpu.cmake \ No newline at end of file diff --git a/arch/ecmwf/volta/nvhpc/22.3/env.sh b/arch/ecmwf/volta/nvhpc/22.3/env.sh deleted file mode 100644 index 2663d507..00000000 --- a/arch/ecmwf/volta/nvhpc/22.3/env.sh +++ /dev/null @@ -1,54 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - echo "+ module load $1" - module load $1 -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload to be certain -module_unload boost -module_unload cmake -module_unload intel -module_unload pgi -module_unload nvhpc-nompi -module_unload nvhpc -module_unload gnu - -# Load modules -module use /local/hdd/daom/hpc_sdk_22.3/modulefiles/ -module_load nvhpc/22.3 -#module_load nvhpc-nompi/22.3 -module_load boost/1.61.0 -module_load cmake/3.19.5 - -set -x - -# Increase stack size to maximum -ulimit -S -s unlimited - -# Fix boost header location -export BOOST_INCLUDEDIR="/usr/local/apps/boost/1.61.0/PGI/17.1/include/" - -# Custom HDF5 library build with F03 interfaces -export HDF5_ROOT="/local/hdd/nabr/hdf5/nvhpc/22.3" - -# Restore tracing to stored setting -if [[ -n "$tracing_" ]]; then set -x; else set +x; fi - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" -export ANT_OPTS="-Dhttp.proxyHost=proxy.ecmwf.int -Dhttp.proxyPort=3333 -Dhttps.proxyHost=proxy.ecmwf.int -Dhttps.proxyPort=3333" diff --git a/arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake b/arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake deleted file mode 100644 index 96359878..00000000 --- a/arch/ecmwf/volta/nvhpc/22.3/toolchain.cmake +++ /dev/null @@ -1,61 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenMP FLAGS -#################################################################### - -# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module -# unless its stored as a cache variable -set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) - -# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to -# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone -# doesn't have any effect here as the module uses FORCE to overwrite the -# existing value) -set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) -set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") - -#################################################################### -# OpenAcc FLAGS -#################################################################### - -# NB: We have to add `-mp` again to avoid undefined symbols during linking -# (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc70,lineinfo,fastmath" CACHE STRING "" ) -# Enable this to get more detailed compiler output -# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "-fpic") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") - -set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) - -set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) - -set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) - -# Fix for C++ template headers needed for Serialbox -set( GNU_HEADER_INCLUDE "-I/usr/local/apps/gcc/7.3.0/lib/gcc/x86_64-linux-gnu/7.3.0/include-fixed" ) -set( ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} ${GNU_HEADER_INCLUDE}" ) diff --git a/arch/ecmwf/xc40/cray/8.7.7/env.sh b/arch/ecmwf/xc40/cray/8.7.7/env.sh deleted file mode 100644 index b38d2762..00000000 --- a/arch/ecmwf/xc40/cray/8.7.7/env.sh +++ /dev/null @@ -1,61 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - if [ "$2" == "ECBUILD_CONFIGURE_ONLY" ]; then - if [ -n "${ECBUILD_CONFIGURE}" ]; then - echo "+ module load $1" - module load $1 - else - echo " WARNING: Module $1 not loaded (only during configuration)" - fi - else - echo "+ module load $1" - module load $1 - fi -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload to be certain -module_unload cmake -module_unload boost -module_unload ecbuild -module_unload cdt -module_unload python -module_unload python3 - -export EC_CRAYPE_INTEGRATION=off - -# Load modules -module_load cdt/18.12 -module_load gcc/6.3.0 -module_load boost/1.61.0 -module_load ninja -module_load cmake/3.15.0 -module_load python/2.7.12-01 -module_load python3/3.6.8-01 - -set -x - -export CRAY_ADD_RPATH=yes - -# This is used to download binary test data -export http_proxy="http://slb-proxy-web.ecmwf.int:3333/" - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/ecmwf/xc40/cray/8.7.7/toolchain.cmake b/arch/ecmwf/xc40/cray/8.7.7/toolchain.cmake deleted file mode 120000 index 2883f014..00000000 --- a/arch/ecmwf/xc40/cray/8.7.7/toolchain.cmake +++ /dev/null @@ -1 +0,0 @@ -../../../../toolchains/ecmwf-xc40-cray.cmake \ No newline at end of file diff --git a/arch/ecmwf/xc40/intel/18.0.0/env.sh b/arch/ecmwf/xc40/intel/18.0.0/env.sh deleted file mode 100644 index 9eb5b380..00000000 --- a/arch/ecmwf/xc40/intel/18.0.0/env.sh +++ /dev/null @@ -1,70 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -# Source me to get the correct configure/build/run environment - -# Store tracing and disable (module is *way* too verbose) -{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null - -module_load() { - if [ "$2" == "ECBUILD_CONFIGURE_ONLY" ]; then - if [ -n "${ECBUILD_CONFIGURE}" ]; then - echo "+ module load $1" - module load $1 - else - echo " WARNING: Module $1 not loaded (only during configuration)" - fi - else - echo "+ module load $1" - module load $1 - fi -} -module_unload() { - echo "+ module unload $1" - module unload $1 -} - -# Unload to be certain -module_unload cmake -module_unload python -module_unload python3 -module_unload boost -module_unload ecbuild -module_unload ifs-support -module_unload cdt -module_unload boost -module_unload PrgEnv-cray -module_unload PrgEnv-intel -module_unload intel -module_unload gcc - -export EC_CRAYPE_INTEGRATION=off - -# Load modules -module load gcc -module_load PrgEnv-intel/5.2.82 -module_unload intel -module_load intel/18.0.0.033 -module_load python/2.7.12-01 -module_load python3/3.6.8-01 -module_load boost/1.61.0 -module_load cray-snplauncher -module_load atp -module_load ninja -module_load cmake/3.15.0 -module_load boost/1.61.0 - -set -x - -# This is used to download binary test data -export http_proxy="http://slb-proxy-web.ecmwf.int:3333/" - -# Restore tracing to stored setting -{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null - -export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/ecmwf/xc40/intel/18.0.0/toolchain.cmake b/arch/ecmwf/xc40/intel/18.0.0/toolchain.cmake deleted file mode 120000 index 8195a573..00000000 --- a/arch/ecmwf/xc40/intel/18.0.0/toolchain.cmake +++ /dev/null @@ -1 +0,0 @@ -../../../../toolchains/ecmwf-xc40-intel.cmake \ No newline at end of file diff --git a/arch/toolchains/ecmwf-volta-pgi-gpu.cmake b/arch/toolchains/ecmwf-volta-pgi-gpu.cmake deleted file mode 100644 index df80087a..00000000 --- a/arch/toolchains/ecmwf-volta-pgi-gpu.cmake +++ /dev/null @@ -1,63 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenMP FLAGS -#################################################################### - -# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module -# unless its stored as a cache variable -set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) - -# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to -# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone -# doesn't have any effect here as the module uses FORCE to overwrite the -# existing value) -set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) -set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") - -#################################################################### -# OpenAcc FLAGS -#################################################################### - -# NB: We have to add `-mp` again to avoid undefined symbols during linking -# (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc70,lineinfo,fastmath" CACHE STRING "" ) -# Enable this to get more detailed compiler output -# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "-fpic") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") - -set(ECBUILD_Fortran_LINK_FLAGS "-gpu=pinned") - -set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) - -set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) - -set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) - -# Fix for C++ template headers needed for Serialbox -set( GNU_HEADER_INCLUDE "-I/usr/local/apps/gcc/7.3.0/lib/gcc/x86_64-linux-gnu/7.3.0/include-fixed" ) -set( ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} ${GNU_HEADER_INCLUDE}" ) diff --git a/arch/toolchains/ecmwf-volta-pgi-host.cmake b/arch/toolchains/ecmwf-volta-pgi-host.cmake deleted file mode 100644 index 2224ba45..00000000 --- a/arch/toolchains/ecmwf-volta-pgi-host.cmake +++ /dev/null @@ -1,49 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - -#################################################################### -# OpenMP FLAGS -#################################################################### - -set( OMP_C_FLAGS "-mp -mp=bind,allcores,numa" ) -set( OMP_CXX_FLAGS "-mp -mp=bind,allcores,numa" ) -set( OMP_Fortran_FLAGS "-mp -mp=bind,allcores,numa" ) - -#################################################################### -# COMMON FLAGS -#################################################################### - -set(ECBUILD_Fortran_FLAGS "-O2 -g ${OMP_Fortran_FLAGS} -fpic") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") - -set( ECBUILD_C_FLAGS "-O2 -gopt ${OMP_C_FLAGS} -traceback" ) - -set( ECBUILD_CXX_FLAGS "-O2 -gopt ${OMP_CXX_FLAGS}" ) - -# Fix for C++ template headers needed for Serialbox -set( GNU_HEADER_INCLUDE "-I/usr/local/apps/gcc/7.3.0/lib/gcc/x86_64-linux-gnu/7.3.0/include-fixed" ) -set( ECBUILD_CXX_FLAGS "${ECBUILD_CXX_FLAGS} ${GNU_HEADER_INCLUDE}" ) - -#################################################################### -# LINK FLAGS -#################################################################### - -set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--as-needed -Wl,-export-dynamic" ) -set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--as-needed -Wl,-export-dynamic" ) diff --git a/arch/toolchains/ecmwf-xc40-cray.cmake b/arch/toolchains/ecmwf-xc40-cray.cmake deleted file mode 100644 index d1db1220..00000000 --- a/arch/toolchains/ecmwf-xc40-cray.cmake +++ /dev/null @@ -1,142 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# ARCHITECTURE -#################################################################### - -set( EC_HAVE_C_INLINE 1 ) -set( EC_HAVE_FUNCTION_DEF 1 ) -set( EC_HAVE_CXXABI_H 1 ) -set( EC_HAVE_CXX_BOOL 1 ) -set( EC_HAVE_CXX_SSTREAM 1 ) -set( EC_HAVE_CXX_INT_128 0 ) -set( CMAKE_SIZEOF_VOID_P 8 ) -set( EC_SIZEOF_PTR 8 ) -set( EC_SIZEOF_CHAR 1 ) -set( EC_SIZEOF_SHORT 2 ) -set( EC_SIZEOF_INT 4 ) -set( EC_SIZEOF_LONG 8 ) -set( EC_SIZEOF_LONG_LONG 8 ) -set( EC_SIZEOF_FLOAT 4 ) -set( EC_SIZEOF_DOUBLE 8 ) -set( EC_SIZEOF_LONG_DOUBLE 8 ) -set( EC_SIZEOF_SIZE_T 8 ) -set( EC_SIZEOF_SSIZE_T 8 ) -set( EC_SIZEOF_OFF_T 8 ) -set( EC_BIG_ENDIAN 0 ) -set( EC_LITTLE_ENDIAN 1 ) -set( IEEE_BE 0 ) -set( IEEE_LE 1 ) -set( EC_HAVE_FSEEK 1 ) -set( EC_HAVE_FSEEKO 1 ) -set( EC_HAVE_FTELLO 1 ) -set( EC_HAVE_LSEEK 0 ) -set( EC_HAVE_FTRUNCATE 0 ) -set( EC_HAVE_OPEN 0 ) -set( EC_HAVE_FOPEN 1 ) -set( EC_HAVE_FMEMOPEN 1 ) -set( EC_HAVE_FUNOPEN 0 ) -set( EC_HAVE_FLOCK 1 ) -set( EC_HAVE_MMAP 1 ) -set( EC_HAVE_POSIX_MEMALIGN 1 ) -set( EC_HAVE_F_GETLK 1 ) -set( EC_HAVE_F_SETLK 1 ) -set( EC_HAVE_F_SETLKW 1 ) -set( EC_HAVE_F_GETLK64 1 ) -set( EC_HAVE_F_SETLK64 1 ) -set( EC_HAVE_F_SETLKW64 1 ) -set( EC_HAVE_MAP_ANONYMOUS 1 ) -set( EC_HAVE_MAP_ANON 1 ) -set( EC_HAVE_ASSERT_H 1 ) -set( EC_HAVE_STDLIB_H 1 ) -set( EC_HAVE_UNISTD_H 1 ) -set( EC_HAVE_STRING_H 1 ) -set( EC_HAVE_STRINGS_H 1 ) -set( EC_HAVE_SYS_STAT_H 1 ) -set( EC_HAVE_SYS_TIME_H 1 ) -set( EC_HAVE_SYS_TYPES_H 1 ) -set( EC_HAVE_MALLOC_H 1 ) -set( EC_HAVE_SYS_MALLOC_H 0 ) -set( EC_HAVE_SYS_PARAM_H 1 ) -set( EC_HAVE_SYS_MOUNT_H 1 ) -set( EC_HAVE_SYS_VFS_H 1 ) -set( EC_HAVE_OFFT 1 ) -set( EC_HAVE_OFF64T 1 ) -set( EC_HAVE_STRUCT_STAT 1 ) -set( EC_HAVE_STRUCT_STAT64 1 ) -set( EC_HAVE_STAT 1 ) -set( EC_HAVE_STAT64 1 ) -set( EC_HAVE_FSTAT 1 ) -set( EC_HAVE_FSTAT64 1 ) -set( EC_HAVE_FSEEKO64 1 ) -set( EC_HAVE_FTELLO64 1 ) -set( EC_HAVE_LSEEK64 1 ) -set( EC_HAVE_OPEN64 1 ) -set( EC_HAVE_FOPEN64 1 ) -set( EC_HAVE_FTRUNCATE64 1 ) -set( EC_HAVE_FLOCK64 1 ) -set( EC_HAVE_MMAP64 1 ) -set( EC_HAVE_STRUCT_STATVFS 1 ) -set( EC_HAVE_STRUCT_STATVFS64 1 ) -set( EC_HAVE_FOPENCOOKIE 1 ) -set( EC_HAVE_FSYNC 1 ) -set( EC_HAVE_FDATASYNC 1 ) -set( EC_HAVE_DIRFD 1 ) -set( EC_HAVE_SYSPROC 0 ) -set( EC_HAVE_SYSPROCFS 1 ) -set( EC_HAVE_EXECINFO_BACKTRACE 1 ) -set( EC_HAVE_GMTIME_R 1 ) -set( EC_HAVE_GETPWUID_R 1 ) -set( EC_HAVE_GETPWNAM_R 1 ) -set( EC_HAVE_READDIR_R 1 ) -set( EC_HAVE_DIRENT_D_TYPE 1 ) -set( EC_HAVE_GETHOSTBYNAME_R 1 ) -set( EC_HAVE_ATTRIBUTE_CONSTRUCTOR 1 ) -set( EC_ATTRIBUTE_CONSTRUCTOR_INITS_ARGV 0 ) -set( EC_HAVE_PROCFS 1 ) -set( EC_HAVE_DLFCN_H 1 ) -set( EC_HAVE_DLADDR 1 ) -set( EC_HAVE_AIOCB 1 ) -set( EC_HAVE_AIOCB64 1 ) - -# Disable relative rpaths as aprun does not respect it -set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE ) - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI OFF ) -set( ECBUILD_TRUST_FLAGS ON ) - -#################################################################### -# Compiler FLAGS -#################################################################### - -# General Flags (add to default) - -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hcontiguous") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") - -#################################################################### -# LINK FLAGS -#################################################################### - -if( EXISTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" ) - set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" ) -elseif( EXISTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" ) - set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" ) -endif() - -set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp" ) -set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap" ) -set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap -Wl,--as-needed" ) -set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" ) diff --git a/arch/toolchains/ecmwf-xc40-intel.cmake b/arch/toolchains/ecmwf-xc40-intel.cmake deleted file mode 100644 index 3bdf3456..00000000 --- a/arch/toolchains/ecmwf-xc40-intel.cmake +++ /dev/null @@ -1,149 +0,0 @@ -# (C) Copyright 1988- ECMWF. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#################################################################### -# ARCHITECTURE -#################################################################### - -set( EC_HAVE_C_INLINE 1 ) -set( EC_HAVE_FUNCTION_DEF 1 ) -set( EC_HAVE_CXXABI_H 1 ) -set( EC_HAVE_CXX_BOOL 1 ) -set( EC_HAVE_CXX_SSTREAM 1 ) -set( EC_HAVE_CXX_INT_128 0 ) -set( CMAKE_SIZEOF_VOID_P 8 ) -set( EC_SIZEOF_PTR 8 ) -set( EC_SIZEOF_CHAR 1 ) -set( EC_SIZEOF_SHORT 2 ) -set( EC_SIZEOF_INT 4 ) -set( EC_SIZEOF_LONG 8 ) -set( EC_SIZEOF_LONG_LONG 8 ) -set( EC_SIZEOF_FLOAT 4 ) -set( EC_SIZEOF_DOUBLE 8 ) -set( EC_SIZEOF_LONG_DOUBLE 8 ) -set( EC_SIZEOF_SIZE_T 8 ) -set( EC_SIZEOF_SSIZE_T 8 ) -set( EC_SIZEOF_OFF_T 8 ) -set( EC_BIG_ENDIAN 0 ) -set( EC_LITTLE_ENDIAN 1 ) -set( IEEE_BE 0 ) -set( IEEE_LE 1 ) -set( EC_HAVE_FSEEK 1 ) -set( EC_HAVE_FSEEKO 1 ) -set( EC_HAVE_FTELLO 1 ) -set( EC_HAVE_LSEEK 0 ) -set( EC_HAVE_FTRUNCATE 0 ) -set( EC_HAVE_OPEN 0 ) -set( EC_HAVE_FOPEN 1 ) -set( EC_HAVE_FMEMOPEN 1 ) -set( EC_HAVE_FUNOPEN 0 ) -set( EC_HAVE_FLOCK 1 ) -set( EC_HAVE_MMAP 1 ) -set( EC_HAVE_POSIX_MEMALIGN 1 ) -set( EC_HAVE_F_GETLK 1 ) -set( EC_HAVE_F_SETLK 1 ) -set( EC_HAVE_F_SETLKW 1 ) -set( EC_HAVE_F_GETLK64 1 ) -set( EC_HAVE_F_SETLK64 1 ) -set( EC_HAVE_F_SETLKW64 1 ) -set( EC_HAVE_MAP_ANONYMOUS 1 ) -set( EC_HAVE_MAP_ANON 1 ) -set( EC_HAVE_ASSERT_H 1 ) -set( EC_HAVE_STDLIB_H 1 ) -set( EC_HAVE_UNISTD_H 1 ) -set( EC_HAVE_STRING_H 1 ) -set( EC_HAVE_STRINGS_H 1 ) -set( EC_HAVE_SYS_STAT_H 1 ) -set( EC_HAVE_SYS_TIME_H 1 ) -set( EC_HAVE_SYS_TYPES_H 1 ) -set( EC_HAVE_MALLOC_H 1 ) -set( EC_HAVE_SYS_MALLOC_H 0 ) -set( EC_HAVE_SYS_PARAM_H 1 ) -set( EC_HAVE_SYS_MOUNT_H 1 ) -set( EC_HAVE_SYS_VFS_H 1 ) -set( EC_HAVE_OFFT 1 ) -set( EC_HAVE_OFF64T 1 ) -set( EC_HAVE_STRUCT_STAT 1 ) -set( EC_HAVE_STRUCT_STAT64 1 ) -set( EC_HAVE_STAT 1 ) -set( EC_HAVE_STAT64 1 ) -set( EC_HAVE_FSTAT 1 ) -set( EC_HAVE_FSTAT64 1 ) -set( EC_HAVE_FSEEKO64 1 ) -set( EC_HAVE_FTELLO64 1 ) -set( EC_HAVE_LSEEK64 1 ) -set( EC_HAVE_OPEN64 1 ) -set( EC_HAVE_FOPEN64 1 ) -set( EC_HAVE_FTRUNCATE64 1 ) -set( EC_HAVE_FLOCK64 1 ) -set( EC_HAVE_MMAP64 1 ) -set( EC_HAVE_STRUCT_STATVFS 1 ) -set( EC_HAVE_STRUCT_STATVFS64 1 ) -set( EC_HAVE_FOPENCOOKIE 1 ) -set( EC_HAVE_FSYNC 1 ) -set( EC_HAVE_FDATASYNC 1 ) -set( EC_HAVE_DIRFD 1 ) -set( EC_HAVE_SYSPROC 0 ) -set( EC_HAVE_SYSPROCFS 1 ) -set( EC_HAVE_EXECINFO_BACKTRACE 1 ) -set( EC_HAVE_GMTIME_R 1 ) -set( EC_HAVE_GETPWUID_R 1 ) -set( EC_HAVE_GETPWNAM_R 1 ) -set( EC_HAVE_READDIR_R 1 ) -set( EC_HAVE_DIRENT_D_TYPE 1 ) -set( EC_HAVE_GETHOSTBYNAME_R 1 ) -set( EC_HAVE_ATTRIBUTE_CONSTRUCTOR 1 ) -set( EC_ATTRIBUTE_CONSTRUCTOR_INITS_ARGV 0 ) -set( EC_HAVE_PROCFS 1 ) -set( EC_HAVE_DLFCN_H 1 ) -set( EC_HAVE_DLADDR 1 ) -set( EC_HAVE_AIOCB 1 ) -set( EC_HAVE_AIOCB64 1 ) - -# Disable relative rpaths as aprun does not respect it -set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE ) - -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI OFF ) -set( ECBUILD_TRUST_FLAGS ON ) - -#################################################################### -# Compiler FLAGS -#################################################################### - -# General Flags (add to default) - -set(ECBUILD_Fortran_FLAGS "-g") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -qopenmp-threadprivate compat") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume byterecl") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -xHost") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=500") -set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline") - -#################################################################### -# LINK FLAGS -#################################################################### - -if( EXISTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" ) - set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/x86-64/libcray-c++-rts.so" ) -elseif( EXISTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" ) - set( LIBCRAY_CXX_RTS "$ENV{CC_X86_64}/lib/libcray-c++-rts.so" ) -endif() - -set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp" ) -set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap" ) -set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Ktrap=fp -Wl,-Map,loadmap -Wl,--as-needed" ) -set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" ) From fd769499e1bb4d0f27594f269bab2b87d10adede Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 15 Mar 2023 11:20:40 +0100 Subject: [PATCH 009/174] add Atlas data structures to be used in a separate exe: cloudsc-fortran-atlas --- CMakeLists.txt | 1 + src/CMakeLists.txt | 1 + src/cloudsc_fortran_atlas/CMakeLists.txt | 77 + src/cloudsc_fortran_atlas/cloudsc.F90 | 2867 +++++++++++++++++ .../cloudsc_driver_mod.F90 | 210 ++ .../cloudsc_global_atlas_state_mod.F90 | 313 ++ .../dwarf_cloudsc_atlas.F90 | 105 + .../expand_atlas_mod.F90 | 151 + .../validate_atlas_mod.F90 | 182 ++ 9 files changed, 3907 insertions(+) create mode 100644 src/cloudsc_fortran_atlas/CMakeLists.txt create mode 100644 src/cloudsc_fortran_atlas/cloudsc.F90 create mode 100644 src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 create mode 100644 src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 create mode 100644 src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 create mode 100644 src/cloudsc_fortran_atlas/expand_atlas_mod.F90 create mode 100644 src/cloudsc_fortran_atlas/validate_atlas_mod.F90 diff --git a/CMakeLists.txt b/CMakeLists.txt index 9eafd77e..04939da8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,7 @@ if( HAVE_SERIALBOX ) endif() ecbuild_find_package( NAME loki ) +ecbuild_find_package( NAME atlas ) # Add option for single-precision builds ecbuild_add_option( FEATURE SINGLE_PRECISION diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b0a1c105..eac0dbb8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(prototype1) add_subdirectory(common) add_subdirectory(cloudsc_fortran) +add_subdirectory(cloudsc_fortran_atlas) add_subdirectory(cloudsc_c) add_subdirectory(cloudsc_cuda) add_subdirectory(cloudsc_gpu) diff --git a/src/cloudsc_fortran_atlas/CMakeLists.txt b/src/cloudsc_fortran_atlas/CMakeLists.txt new file mode 100644 index 00000000..dc21c6ae --- /dev/null +++ b/src/cloudsc_fortran_atlas/CMakeLists.txt @@ -0,0 +1,77 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Define this dwarf variant as an ECBuild feature +ecbuild_add_option( FEATURE CLOUDSC_FORTRAN_ATLAS + DESCRIPTION "Build the Fortran version CLOUDSC using Atlas and Serialbox" DEFAULT ON + CONDITION atlas_FOUND AND (Serialbox_FOUND OR HDF5_FOUND) +) + +if( HAVE_CLOUDSC_FORTRAN_ATLAS ) + ecbuild_add_executable( + TARGET dwarf-cloudsc-fortran-atlas + SOURCES + cloudsc_global_atlas_state_mod.F90 + expand_atlas_mod.F90 + validate_atlas_mod.F90 + cloudsc_driver_mod.F90 + cloudsc.F90 + dwarf_cloudsc_atlas.F90 + LIBS + cloudsc-common-lib + atlas_f + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) +endif() + + # Create symlink for the input data + if( HAVE_SERIALBOX ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + endif() + + if( HAVE_HDF5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) + endif() + + ecbuild_add_test( + TARGET dwarf-cloudsc-fortran-atlas-serial + COMMAND bin/dwarf-cloudsc-atlas-fortran + ARGS 1 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) + ecbuild_add_test( + TARGET dwarf-cloudsc-fortran-atlas-omp + COMMAND bin/dwarf-cloudsc-fortran-atlas + ARGS 4 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 4 + CONDITION HAVE_OMP + ) + ecbuild_add_test( + TARGET dwarf-cloudsc-fortran-atlas-mpi + COMMAND bin/dwarf-cloudsc-fortran-atlas + ARGS 1 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + MPI 2 + OMP 1 + CONDITION HAVE_MPI + ) + ecbuild_add_test( + TARGET dwarf-cloudsc-fortran-mpi-atlas-omp + COMMAND bin/dwarf-cloudsc-fortran-atlas + ARGS 4 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + MPI 2 + OMP 4 + CONDITION HAVE_OMP AND HAVE_MPI + ) diff --git a/src/cloudsc_fortran_atlas/cloudsc.F90 b/src/cloudsc_fortran_atlas/cloudsc.F90 new file mode 100644 index 00000000..0c712de4 --- /dev/null +++ b/src/cloudsc_fortran_atlas/cloudsc.F90 @@ -0,0 +1,2867 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +SUBROUTINE CLOUDSC & + !---input + & (KIDIA, KFDIA, KLON, KLEV,& + & PTSPHY,& + & PT, PQ, tendency_cml,tendency_tmp,tendency_loc, & + & PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, & + & PHRSW, PHRLW,& + & PVERVEL, PAP, PAPH,& + & PLSM, LDCUM, KTYPE, & + & PLU, PLUDE, PSNDE, PMFU, PMFD,& + !---prognostic fields + & PA,& + & PCLV, & + & PSUPSAT,& +!-- arrays for aerosol-cloud interactions +!!! & PQAER, KAER, & + & PLCRIT_AER,PICRIT_AER,& + & PRE_ICE,& + & PCCN, PNICE,& + !---diagnostic output + & PCOVPTOT, PRAINFRAC_TOPRFZ,& + !---resulting fluxes + & PFSQLF, PFSQIF , PFCQNNG, PFCQLNG,& + & PFSQRF, PFSQSF , PFCQRNG, PFCQSNG,& + & PFSQLTUR, PFSQITUR , & + & PFPLSL, PFPLSN, PFHPSL, PFHPSN,& + & KFLDX) + +!=============================================================================== +!**** *CLOUDSC* - ROUTINE FOR PARAMATERIZATION OF CLOUD PROCESSES +! FOR PROGNOSTIC CLOUD SCHEME +!! +! M.Tiedtke, C.Jakob, A.Tompkins, R.Forbes (E.C.M.W.F.) +!! +! PURPOSE +! ------- +! THIS ROUTINE UPDATES THE CONV/STRAT CLOUD FIELDS. +! THE FOLLOWING PROCESSES ARE CONSIDERED: +! - Detrainment of cloud water from convective updrafts +! - Evaporation/condensation of cloud water in connection +! with heating/cooling such as by subsidence/ascent +! - Erosion of clouds by turbulent mixing of cloud air +! with unsaturated environmental air +! - Deposition onto ice when liquid water present (Bergeron-Findeison) +! - Conversion of cloud water into rain (collision-coalescence) +! - Conversion of cloud ice to snow (aggregation) +! - Sedimentation of rain, snow and ice +! - Evaporation of rain and snow +! - Melting of snow and ice +! - Freezing of liquid and rain +! Note: Turbulent transports of s,q,u,v at cloud tops due to +! buoyancy fluxes and lw radiative cooling are treated in +! the VDF scheme +!! +! INTERFACE. +! ---------- +! *CLOUDSC* IS CALLED FROM *CALLPAR* +! THE ROUTINE TAKES ITS INPUT FROM THE LONG-TERM STORAGE: +! T,Q,L,PHI AND DETRAINMENT OF CLOUD WATER FROM THE +! CONVECTIVE CLOUDS (MASSFLUX CONVECTION SCHEME), BOUNDARY +! LAYER TURBULENT FLUXES OF HEAT AND MOISTURE, RADIATIVE FLUXES, +! OMEGA. +! IT RETURNS ITS OUTPUT TO: +! 1.MODIFIED TENDENCIES OF MODEL VARIABLES T AND Q +! AS WELL AS CLOUD VARIABLES L AND C +! 2.GENERATES PRECIPITATION FLUXES FROM STRATIFORM CLOUDS +!! +! EXTERNALS. +! ---------- +! NONE +!! +! MODIFICATIONS. +! ------------- +! M. TIEDTKE E.C.M.W.F. 8/1988, 2/1990 +! CH. JAKOB E.C.M.W.F. 2/1994 IMPLEMENTATION INTO IFS +! A.TOMPKINS E.C.M.W.F. 2002 NEW NUMERICS +! 01-05-22 : D.Salmond Safety modifications +! 02-05-29 : D.Salmond Optimisation +! 03-01-13 : J.Hague MASS Vector Functions J.Hague +! 03-10-01 : M.Hamrud Cleaning +! 04-12-14 : A.Tompkins New implicit solver and physics changes +! 04-12-03 : A.Tompkins & M.Ko"hler moist PBL +! G.Mozdzynski 09-Jan-2006 EXP security fix +! 19-01-09 : P.Bechtold Changed increased RCLDIFF value for KTYPE=2 +! 07-07-10 : A.Tompkins/R.Forbes 4-Phase flexible microphysics +! 01-03-11 : R.Forbes Mixed phase changes and tidy up +! 01-10-11 : R.Forbes Melt ice to rain, allow rain to freeze +! 01-10-11 : R.Forbes Limit supersat to avoid excessive values +! 31-10-11 : M.Ahlgrimm Add rain, snow and PEXTRA to DDH output +! 17-02-12 : F.Vana Simplified/optimized LU factorization +! 18-05-12 : F.Vana Cleaning + better support of sequential physics +! N.Semane+P.Bechtold 04-10-2012 Add RVRFACTOR factor for small planet +! 01-02-13 : R.Forbes New params of autoconv/acc,rain evap,snow riming +! 15-03-13 : F. Vana New dataflow + more tendencies from the first call +! K. Yessad (July 2014): Move some variables. +! F. Vana 05-Mar-2015 Support for single precision +! 15-01-15 : R.Forbes Added new options for snow evap & ice deposition +! 10-01-15 : R.Forbes New physics for rain freezing +! 23-10-14 : P. Bechtold remove zeroing of convection arrays +! +! SWITCHES. +! -------- +!! +! MODEL PARAMETERS +! ---------------- +! RCLDIFF: PARAMETER FOR EROSION OF CLOUDS +! RCLCRIT_SEA: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER SEA +! RCLCRIT_LAND: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER LAND +! RLCRITSNOW: THRESHOLD VALUE FOR SNOW AUTOCONVERSION +! RKCONV: PARAMETER FOR AUTOCONVERSION OF CLOUDS (KESSLER) +! RCLDMAX: MAXIMUM POSSIBLE CLW CONTENT (MASON,1971) +!! +! REFERENCES. +! ---------- +! TIEDTKE MWR 1993 +! JAKOB PhD 2000 +! GREGORY ET AL. QJRMS 2000 +! TOMPKINS ET AL. QJRMS 2007 +!! +!=============================================================================== + +USE PARKIND1 , ONLY : JPIM, JPRB +!USE YOMHOOK , ONLY : LHOOK, DR_HOOK +USE YOMCST , ONLY : RG, RD, RCPD, RETV, RLVTT, RLSTT, RLMLT, RTT, RV +USE YOETHF , ONLY : R2ES, R3LES, R3IES, R4LES, R4IES, R5LES, R5IES, & + & R5ALVCP, R5ALSCP, RALVDCP, RALSDCP, RALFDCP, RTWAT, RTICE, RTICECU, & + & RTWAT_RTICE_R, RTWAT_RTICECU_R, RKOOP1, RKOOP2 +USE YOECLDP , ONLY : YRECLDP, NCLDQV, NCLDQL, NCLDQR, NCLDQI, NCLDQS, NCLV +USE YOMPHYDER ,ONLY : STATE_TYPE + +IMPLICIT NONE + +!------------------------------------------------------------------------------- +! Declare input/output arguments +!------------------------------------------------------------------------------- + +! PLCRIT_AER : critical liquid mmr for rain autoconversion process +! PICRIT_AER : critical liquid mmr for snow autoconversion process +! PRE_LIQ : liq Re +! PRE_ICE : ice Re +! PCCN : liquid cloud condensation nuclei +! PNICE : ice number concentration (cf. CCN) + +REAL(KIND=JPRB) ,INTENT(IN) :: PLCRIT_AER(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PICRIT_AER(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PRE_ICE(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PCCN(KLON,KLEV) ! liquid cloud condensation nuclei +REAL(KIND=JPRB) ,INTENT(IN) :: PNICE(KLON,KLEV) ! ice number concentration (cf. CCN) + +INTEGER(KIND=JPIM),INTENT(IN) :: KLON ! Number of grid points +INTEGER(KIND=JPIM),INTENT(IN) :: KLEV ! Number of levels +INTEGER(KIND=JPIM),INTENT(IN) :: KIDIA +INTEGER(KIND=JPIM),INTENT(IN) :: KFDIA +REAL(KIND=JPRB) ,INTENT(IN) :: PTSPHY ! Physics timestep +REAL(KIND=JPRB) ,INTENT(IN) :: PT(KLON,KLEV) ! T at start of callpar +REAL(KIND=JPRB) ,INTENT(IN) :: PQ(KLON,KLEV) ! Q at start of callpar +TYPE (STATE_TYPE) , INTENT (IN) :: tendency_cml ! cumulative tendency used for final output +TYPE (STATE_TYPE) , INTENT (IN) :: tendency_tmp ! cumulative tendency used as input +TYPE (STATE_TYPE) , INTENT (OUT) :: tendency_loc ! local tendency from cloud scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PVFA(KLON,KLEV) ! CC from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PVFL(KLON,KLEV) ! Liq from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PVFI(KLON,KLEV) ! Ice from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNA(KLON,KLEV) ! CC from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNL(KLON,KLEV) ! Liq from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNI(KLON,KLEV) ! Liq from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PHRSW(KLON,KLEV) ! Short-wave heating rate +REAL(KIND=JPRB) ,INTENT(IN) :: PHRLW(KLON,KLEV) ! Long-wave heating rate +REAL(KIND=JPRB) ,INTENT(IN) :: PVERVEL(KLON,KLEV) !Vertical velocity +REAL(KIND=JPRB) ,INTENT(IN) :: PAP(KLON,KLEV) ! Pressure on full levels +REAL(KIND=JPRB) ,INTENT(IN) :: PAPH(KLON,KLEV+1)! Pressure on half levels +REAL(KIND=JPRB) ,INTENT(IN) :: PLSM(KLON) ! Land fraction (0-1) +LOGICAL ,INTENT(IN) :: LDCUM(KLON) ! Convection active +INTEGER(KIND=JPIM),INTENT(IN) :: KTYPE(KLON) ! Convection type 0,1,2 +REAL(KIND=JPRB) ,INTENT(IN) :: PLU(KLON,KLEV) ! Conv. condensate +REAL(KIND=JPRB) ,INTENT(INOUT) :: PLUDE(KLON,KLEV) ! Conv. detrained water +REAL(KIND=JPRB) ,INTENT(IN) :: PSNDE(KLON,KLEV) ! Conv. detrained snow +REAL(KIND=JPRB) ,INTENT(IN) :: PMFU(KLON,KLEV) ! Conv. mass flux up +REAL(KIND=JPRB) ,INTENT(IN) :: PMFD(KLON,KLEV) ! Conv. mass flux down +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KLON,KLEV) ! Original Cloud fraction (t) + +INTEGER(KIND=JPIM),INTENT(IN) :: KFLDX + +REAL(KIND=JPRB) ,INTENT(IN) :: PCLV(KLON,KLEV,NCLV) + + ! Supersat clipped at previous time level in SLTEND +REAL(KIND=JPRB) ,INTENT(IN) :: PSUPSAT(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(OUT) :: PCOVPTOT(KLON,KLEV) ! Precip fraction +REAL(KIND=JPRB) ,INTENT(OUT) :: PRAINFRAC_TOPRFZ(KLON) +! Flux diagnostics for DDH budget +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQLF(KLON,KLEV+1) ! Flux of liquid +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQIF(KLON,KLEV+1) ! Flux of ice +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQLNG(KLON,KLEV+1) ! -ve corr for liq +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQNNG(KLON,KLEV+1) ! -ve corr for ice +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQRF(KLON,KLEV+1) ! Flux diagnostics +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQSF(KLON,KLEV+1) ! for DDH, generic +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQRNG(KLON,KLEV+1) ! rain +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQSNG(KLON,KLEV+1) ! snow +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQLTUR(KLON,KLEV+1) ! liquid flux due to VDF +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQITUR(KLON,KLEV+1) ! ice flux due to VDF +REAL(KIND=JPRB) ,INTENT(OUT) :: PFPLSL(KLON,KLEV+1) ! liq+rain sedim flux +REAL(KIND=JPRB) ,INTENT(OUT) :: PFPLSN(KLON,KLEV+1) ! ice+snow sedim flux +REAL(KIND=JPRB) ,INTENT(OUT) :: PFHPSL(KLON,KLEV+1) ! Enthalpy flux for liq +REAL(KIND=JPRB) ,INTENT(OUT) :: PFHPSN(KLON,KLEV+1) ! Enthalp flux for ice + +!------------------------------------------------------------------------------- +! Declare local variables +!------------------------------------------------------------------------------- + +REAL(KIND=JPRB) :: & +! condensation and evaporation terms + & ZLCOND1(KLON), ZLCOND2(KLON),& + & ZLEVAP, ZLEROS,& + & ZLEVAPL(KLON), ZLEVAPI(KLON),& +! autoconversion terms + & ZRAINAUT(KLON), ZSNOWAUT(KLON), & + & ZLIQCLD(KLON), ZICECLD(KLON) +REAL(KIND=JPRB) :: ZFOKOOP(KLON), ZFOEALFA(KLON,KLEV+1) +REAL(KIND=JPRB) :: ZICENUCLEI(KLON) ! number concentration of ice nuclei + +REAL(KIND=JPRB) :: ZLICLD(KLON) +REAL(KIND=JPRB) :: ZACOND +REAL(KIND=JPRB) :: ZAEROS +REAL(KIND=JPRB) :: ZLFINALSUM(KLON) +REAL(KIND=JPRB) :: ZDQS(KLON) +REAL(KIND=JPRB) :: ZTOLD(KLON) +REAL(KIND=JPRB) :: ZQOLD(KLON) +REAL(KIND=JPRB) :: ZDTGDP(KLON) +REAL(KIND=JPRB) :: ZRDTGDP(KLON) +REAL(KIND=JPRB) :: ZTRPAUS(KLON) +REAL(KIND=JPRB) :: ZCOVPCLR(KLON) +REAL(KIND=JPRB) :: ZPRECLR +REAL(KIND=JPRB) :: ZCOVPTOT(KLON) +REAL(KIND=JPRB) :: ZCOVPMAX(KLON) +REAL(KIND=JPRB) :: ZQPRETOT(KLON) +REAL(KIND=JPRB) :: ZDPEVAP +REAL(KIND=JPRB) :: ZDTFORC +REAL(KIND=JPRB) :: ZDTDIAB +REAL(KIND=JPRB) :: ZTP1(KLON,KLEV) +REAL(KIND=JPRB) :: ZLDEFR(KLON) +REAL(KIND=JPRB) :: ZLDIFDT(KLON) +REAL(KIND=JPRB) :: ZDTGDPF(KLON) +REAL(KIND=JPRB) :: ZLCUST(KLON,NCLV) +REAL(KIND=JPRB) :: ZACUST(KLON) +REAL(KIND=JPRB) :: ZMF(KLON) + +REAL(KIND=JPRB) :: ZRHO(KLON) +REAL(KIND=JPRB) :: ZTMP1(KLON),ZTMP2(KLON),ZTMP3(KLON) +REAL(KIND=JPRB) :: ZTMP4(KLON),ZTMP5(KLON),ZTMP6(KLON),ZTMP7(KLON) +REAL(KIND=JPRB) :: ZALFAWM(KLON) + +! Accumulators of A,B,and C factors for cloud equations +REAL(KIND=JPRB) :: ZSOLAB(KLON) ! -ve implicit CC +REAL(KIND=JPRB) :: ZSOLAC(KLON) ! linear CC +REAL(KIND=JPRB) :: ZANEW +REAL(KIND=JPRB) :: ZANEWM1(KLON) + +REAL(KIND=JPRB) :: ZGDP(KLON) + +!---for flux calculation +REAL(KIND=JPRB) :: ZDA(KLON) +REAL(KIND=JPRB) :: ZLI(KLON,KLEV), ZA(KLON,KLEV) +REAL(KIND=JPRB) :: ZAORIG(KLON,KLEV) ! start of scheme value for CC + +LOGICAL :: LLFLAG(KLON) +LOGICAL :: LLO1 + +INTEGER(KIND=JPIM) :: ICALL, IK, JK, JL, JM, JN, JO, JLEN, IS + +REAL(KIND=JPRB) :: ZDP(KLON), ZPAPHD(KLON) + +REAL(KIND=JPRB) :: ZALFA +! & ZALFACU, ZALFALS +REAL(KIND=JPRB) :: ZALFAW +REAL(KIND=JPRB) :: ZBETA,ZBETA1 +!REAL(KIND=JPRB) :: ZBOTT +REAL(KIND=JPRB) :: ZCFPR +REAL(KIND=JPRB) :: ZCOR +REAL(KIND=JPRB) :: ZCDMAX +REAL(KIND=JPRB) :: ZMIN(KLON) +REAL(KIND=JPRB) :: ZLCONDLIM +REAL(KIND=JPRB) :: ZDENOM +REAL(KIND=JPRB) :: ZDPMXDT +REAL(KIND=JPRB) :: ZDPR +REAL(KIND=JPRB) :: ZDTDP +REAL(KIND=JPRB) :: ZE +REAL(KIND=JPRB) :: ZEPSEC +REAL(KIND=JPRB) :: ZFAC, ZFACI, ZFACW +REAL(KIND=JPRB) :: ZGDCP +REAL(KIND=JPRB) :: ZINEW +REAL(KIND=JPRB) :: ZLCRIT +REAL(KIND=JPRB) :: ZMFDN +REAL(KIND=JPRB) :: ZPRECIP +REAL(KIND=JPRB) :: ZQE +REAL(KIND=JPRB) :: ZQSAT, ZQTMST, ZRDCP +REAL(KIND=JPRB) :: ZRHC, ZSIG, ZSIGK +REAL(KIND=JPRB) :: ZWTOT +REAL(KIND=JPRB) :: ZZCO, ZZDL, ZZRH, ZZZDT, ZQADJ +REAL(KIND=JPRB) :: ZQNEW, ZTNEW +REAL(KIND=JPRB) :: ZRG_R,ZGDPH_R,ZCONS1,ZCOND,ZCONS1A +REAL(KIND=JPRB) :: ZLFINAL +REAL(KIND=JPRB) :: ZMELT +REAL(KIND=JPRB) :: ZEVAP +REAL(KIND=JPRB) :: ZFRZ +REAL(KIND=JPRB) :: ZVPLIQ, ZVPICE +REAL(KIND=JPRB) :: ZADD, ZBDD, ZCVDS, ZICE0, ZDEPOS +REAL(KIND=JPRB) :: ZSUPSAT(KLON) +REAL(KIND=JPRB) :: ZFALL +REAL(KIND=JPRB) :: ZRE_ICE +REAL(KIND=JPRB) :: ZRLDCP +REAL(KIND=JPRB) :: ZQP1ENV + +!---------------------------- +! Arrays for new microphysics +!---------------------------- +INTEGER(KIND=JPIM) :: IPHASE(NCLV) ! marker for water phase of each species + ! 0=vapour, 1=liquid, 2=ice + +INTEGER(KIND=JPIM) :: IMELT(NCLV) ! marks melting linkage for ice categories + ! ice->liquid, snow->rain + +LOGICAL :: LLFALL(NCLV) ! marks falling species + ! LLFALL=0, cloud cover must > 0 for zqx > 0 + ! LLFALL=1, no cloud needed, zqx can evaporate + +LOGICAL :: LLINDEX1(KLON,NCLV) ! index variable +LOGICAL :: LLINDEX3(KLON,NCLV,NCLV) ! index variable +REAL(KIND=JPRB) :: ZMAX +REAL(KIND=JPRB) :: ZRAT +INTEGER(KIND=JPIM) :: IORDER(KLON,NCLV) ! array for sorting explicit terms + +REAL(KIND=JPRB) :: ZLIQFRAC(KLON,KLEV) ! cloud liquid water fraction: ql/(ql+qi) +REAL(KIND=JPRB) :: ZICEFRAC(KLON,KLEV) ! cloud ice water fraction: qi/(ql+qi) +REAL(KIND=JPRB) :: ZQX(KLON,KLEV,NCLV) ! water variables +REAL(KIND=JPRB) :: ZQX0(KLON,KLEV,NCLV) ! water variables at start of scheme +REAL(KIND=JPRB) :: ZQXN(KLON,NCLV) ! new values for zqx at time+1 +REAL(KIND=JPRB) :: ZQXFG(KLON,NCLV) ! first guess values including precip +REAL(KIND=JPRB) :: ZQXNM1(KLON,NCLV) ! new values for zqx at time+1 at level above +REAL(KIND=JPRB) :: ZFLUXQ(KLON,NCLV) ! fluxes convergence of species (needed?) +! Keep the following for possible future total water variance scheme? +!REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature +!REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction +!REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance +!REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) +!REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + +REAL(KIND=JPRB) :: ZPFPLSX(KLON,KLEV+1,NCLV) ! generalized precipitation flux +REAL(KIND=JPRB) :: ZLNEG(KLON,KLEV,NCLV) ! for negative correction diagnostics +REAL(KIND=JPRB) :: ZMELTMAX(KLON) +REAL(KIND=JPRB) :: ZFRZMAX(KLON) +REAL(KIND=JPRB) :: ZICETOT(KLON) + +REAL(KIND=JPRB) :: ZQXN2D(KLON,KLEV,NCLV) ! water variables store + +REAL(KIND=JPRB) :: ZQSMIX(KLON,KLEV) ! diagnostic mixed phase saturation +!REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation +REAL(KIND=JPRB) :: ZQSLIQ(KLON,KLEV) ! liquid water saturation +REAL(KIND=JPRB) :: ZQSICE(KLON,KLEV) ! ice water saturation + +!REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH +!REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq +!REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + +REAL(KIND=JPRB) :: ZFOEEWMT(KLON,KLEV) +REAL(KIND=JPRB) :: ZFOEEW(KLON,KLEV) +REAL(KIND=JPRB) :: ZFOEELIQT(KLON,KLEV) +!REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + +REAL(KIND=JPRB) :: ZDQSLIQDT(KLON), ZDQSICEDT(KLON), ZDQSMIXDT(KLON) +REAL(KIND=JPRB) :: ZCORQSLIQ(KLON) +REAL(KIND=JPRB) :: ZCORQSICE(KLON) +!REAL(KIND=JPRB) :: ZCORQSBIN(KLON) +REAL(KIND=JPRB) :: ZCORQSMIX(KLON) +REAL(KIND=JPRB) :: ZEVAPLIMLIQ(KLON), ZEVAPLIMICE(KLON), ZEVAPLIMMIX(KLON) + +!------------------------------------------------------- +! SOURCE/SINK array for implicit and explicit terms +!------------------------------------------------------- +! a POSITIVE value entered into the arrays is a... +! Source of this variable +! | +! | Sink of this variable +! | | +! V V +! ZSOLQA(JL,IQa,IQb) = explicit terms +! ZSOLQB(JL,IQa,IQb) = implicit terms +! Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is +! a source of NCLDQL and a sink of IQV +! put 'magic' source terms such as PLUDE from +! detrainment into explicit source/sink array diagnognal +! ZSOLQA(NCLDQL,NCLDQL)= -PLUDE +! i.e. A positive value is a sink!????? weird... +!------------------------------------------------------- + +REAL(KIND=JPRB) :: ZSOLQA(KLON,NCLV,NCLV) ! explicit sources and sinks +REAL(KIND=JPRB) :: ZSOLQB(KLON,NCLV,NCLV) ! implicit sources and sinks + ! e.g. microphysical pathways between ice variables. +REAL(KIND=JPRB) :: ZQLHS(KLON,NCLV,NCLV) ! n x n matrix storing the LHS of implicit solver +REAL(KIND=JPRB) :: ZVQX(NCLV) ! fall speeds of three categories +REAL(KIND=JPRB) :: ZEXPLICIT, ZRATIO(KLON,NCLV), ZSINKSUM(KLON,NCLV) + +! for sedimentation source/sink terms +REAL(KIND=JPRB) :: ZFALLSINK(KLON,NCLV) +REAL(KIND=JPRB) :: ZFALLSRCE(KLON,NCLV) + +! for convection detrainment source and subsidence source/sink terms +REAL(KIND=JPRB) :: ZCONVSRCE(KLON,NCLV) +REAL(KIND=JPRB) :: ZCONVSINK(KLON,NCLV) + +! for supersaturation source term from previous timestep +REAL(KIND=JPRB) :: ZPSUPSATSRCE(KLON,NCLV) + +! Numerical fit to wet bulb temperature +REAL(KIND=JPRB),PARAMETER :: ZTW1 = 1329.31_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW2 = 0.0074615_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW3 = 0.85E5_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW4 = 40.637_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW5 = 275.0_JPRB + +REAL(KIND=JPRB) :: ZSUBSAT ! Subsaturation for snow melting term +REAL(KIND=JPRB) :: ZTDMTW0 ! Diff between dry-bulb temperature and + ! temperature when wet-bulb = 0degC + +! Variables for deposition term +REAL(KIND=JPRB) :: ZTCG ! Temperature dependent function for ice PSD +REAL(KIND=JPRB) :: ZFACX1I, ZFACX1S! PSD correction factor +REAL(KIND=JPRB) :: ZAPLUSB,ZCORRFAC,ZCORRFAC2,ZPR02,ZTERM1,ZTERM2 ! for ice dep +REAL(KIND=JPRB) :: ZCLDTOPDIST(KLON) ! Distance from cloud top +REAL(KIND=JPRB) :: ZINFACTOR ! No. of ice nuclei factor for deposition + +! Autoconversion/accretion/riming/evaporation +INTEGER(KIND=JPIM) :: IWARMRAIN +INTEGER(KIND=JPIM) :: IEVAPRAIN +INTEGER(KIND=JPIM) :: IEVAPSNOW +INTEGER(KIND=JPIM) :: IDEPICE +REAL(KIND=JPRB) :: ZRAINACC(KLON) +REAL(KIND=JPRB) :: ZRAINCLD(KLON) +REAL(KIND=JPRB) :: ZSNOWRIME(KLON) +REAL(KIND=JPRB) :: ZSNOWCLD(KLON) +REAL(KIND=JPRB) :: ZESATLIQ +REAL(KIND=JPRB) :: ZFALLCORR +REAL(KIND=JPRB) :: ZLAMBDA +REAL(KIND=JPRB) :: ZEVAP_DENOM +REAL(KIND=JPRB) :: ZCORR2 +REAL(KIND=JPRB) :: ZKA +REAL(KIND=JPRB) :: ZCONST +REAL(KIND=JPRB) :: ZTEMP + +! Rain freezing +LOGICAL :: LLRAINLIQ(KLON) ! True if majority of raindrops are liquid (no ice core) + +!---------------------------- +! End: new microphysics +!---------------------------- + +!---------------------- +! SCM budget statistics +!---------------------- +REAL(KIND=JPRB) :: ZRAIN + +REAL(KIND=JPRB) :: Z_TMP1(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP2(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP3(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP4(KFDIA-KIDIA+1) +!REAL(KIND=JPRB) :: Z_TMP5(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP6(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP7(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMPK(KFDIA-KIDIA+1,KLEV) +!REAL(KIND=JPRB) :: ZCON1,ZCON2 +REAL(KIND=JPRB) :: ZHOOK_HANDLE +REAL(KIND=JPRB) :: ZTMPL,ZTMPI,ZTMPA + +REAL(KIND=JPRB) :: ZMM,ZRR +REAL(KIND=JPRB) :: ZRG(KLON) + +REAL(KIND=JPRB) :: ZBUDCC(KLON,KFLDX) ! extra fields +REAL(KIND=JPRB) :: ZBUDL(KLON,KFLDX) ! extra fields +REAL(KIND=JPRB) :: ZBUDI(KLON,KFLDX) ! extra fields + +REAL(KIND=JPRB) :: ZZSUM, ZZRATIO +REAL(KIND=JPRB) :: ZEPSILON + +REAL(KIND=JPRB) :: ZCOND1, ZQP + + +#include "abor1.intfb.h" + +!DIR$ VFUNCTION EXPHF +#include "fcttre.func.h" +#include "fccld.func.h" + +!=============================================================================== +!IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) +ASSOCIATE(LAERICEAUTO=>YRECLDP%LAERICEAUTO, LAERICESED=>YRECLDP%LAERICESED, & + & LAERLIQAUTOLSP=>YRECLDP%LAERLIQAUTOLSP, LAERLIQCOLL=>YRECLDP%LAERLIQCOLL, & + & LCLDBUDGET=>YRECLDP%LCLDBUDGET, NCLDTOP=>YRECLDP%NCLDTOP, & + & NSSOPT=>YRECLDP%NSSOPT, RAMID=>YRECLDP%RAMID, RAMIN=>YRECLDP%RAMIN, & + & RCCN=>YRECLDP%RCCN, RCLCRIT_LAND=>YRECLDP%RCLCRIT_LAND, & + & RCLCRIT_SEA=>YRECLDP%RCLCRIT_SEA, RCLDIFF=>YRECLDP%RCLDIFF, & + & RCLDIFF_CONVI=>YRECLDP%RCLDIFF_CONVI, RCLDTOPCF=>YRECLDP%RCLDTOPCF, & + & RCL_APB1=>YRECLDP%RCL_APB1, RCL_APB2=>YRECLDP%RCL_APB2, & + & RCL_APB3=>YRECLDP%RCL_APB3, RCL_CDENOM1=>YRECLDP%RCL_CDENOM1, & + & RCL_CDENOM2=>YRECLDP%RCL_CDENOM2, RCL_CDENOM3=>YRECLDP%RCL_CDENOM3, & + & RCL_CONST1I=>YRECLDP%RCL_CONST1I, RCL_CONST1R=>YRECLDP%RCL_CONST1R, & + & RCL_CONST1S=>YRECLDP%RCL_CONST1S, RCL_CONST2I=>YRECLDP%RCL_CONST2I, & + & RCL_CONST2R=>YRECLDP%RCL_CONST2R, RCL_CONST2S=>YRECLDP%RCL_CONST2S, & + & RCL_CONST3I=>YRECLDP%RCL_CONST3I, RCL_CONST3R=>YRECLDP%RCL_CONST3R, & + & RCL_CONST3S=>YRECLDP%RCL_CONST3S, RCL_CONST4I=>YRECLDP%RCL_CONST4I, & + & RCL_CONST4R=>YRECLDP%RCL_CONST4R, RCL_CONST4S=>YRECLDP%RCL_CONST4S, & + & RCL_CONST5I=>YRECLDP%RCL_CONST5I, RCL_CONST5R=>YRECLDP%RCL_CONST5R, & + & RCL_CONST5S=>YRECLDP%RCL_CONST5S, RCL_CONST6I=>YRECLDP%RCL_CONST6I, & + & RCL_CONST6R=>YRECLDP%RCL_CONST6R, RCL_CONST6S=>YRECLDP%RCL_CONST6S, & + & RCL_CONST7S=>YRECLDP%RCL_CONST7S, RCL_CONST8S=>YRECLDP%RCL_CONST8S, & + & RCL_FAC1=>YRECLDP%RCL_FAC1, RCL_FAC2=>YRECLDP%RCL_FAC2, & + & RCL_FZRAB=>YRECLDP%RCL_FZRAB, RCL_KA273=>YRECLDP%RCL_KA273, & + & RCL_KKAAC=>YRECLDP%RCL_KKAAC, RCL_KKAAU=>YRECLDP%RCL_KKAAU, & + & RCL_KKBAC=>YRECLDP%RCL_KKBAC, RCL_KKBAUN=>YRECLDP%RCL_KKBAUN, & + & RCL_KKBAUQ=>YRECLDP%RCL_KKBAUQ, & + & RCL_KK_CLOUD_NUM_LAND=>YRECLDP%RCL_KK_CLOUD_NUM_LAND, & + & RCL_KK_CLOUD_NUM_SEA=>YRECLDP%RCL_KK_CLOUD_NUM_SEA, RCL_X3I=>YRECLDP%RCL_X3I, & + & RCOVPMIN=>YRECLDP%RCOVPMIN, RDENSREF=>YRECLDP%RDENSREF, & + & RDEPLIQREFDEPTH=>YRECLDP%RDEPLIQREFDEPTH, & + & RDEPLIQREFRATE=>YRECLDP%RDEPLIQREFRATE, RICEHI1=>YRECLDP%RICEHI1, & + & RICEHI2=>YRECLDP%RICEHI2, RICEINIT=>YRECLDP%RICEINIT, RKCONV=>YRECLDP%RKCONV, & + & RKOOPTAU=>YRECLDP%RKOOPTAU, RLCRITSNOW=>YRECLDP%RLCRITSNOW, & + & RLMIN=>YRECLDP%RLMIN, RNICE=>YRECLDP%RNICE, RPECONS=>YRECLDP%RPECONS, & + & RPRC1=>YRECLDP%RPRC1, RPRECRHMAX=>YRECLDP%RPRECRHMAX, & + & RSNOWLIN1=>YRECLDP%RSNOWLIN1, RSNOWLIN2=>YRECLDP%RSNOWLIN2, & + & RTAUMEL=>YRECLDP%RTAUMEL, RTHOMO=>YRECLDP%RTHOMO, RVICE=>YRECLDP%RVICE, & + & RVRAIN=>YRECLDP%RVRAIN, RVRFACTOR=>YRECLDP%RVRFACTOR, & + & RVSNOW=>YRECLDP%RVSNOW) + +!=============================================================================== +! 0.0 Beginning of timestep book-keeping +!---------------------------------------------------------------------- + + +!###################################################################### +! 0. *** SET UP CONSTANTS *** +!###################################################################### + +ZEPSILON=100._JPRB*EPSILON(ZEPSILON) + +! --------------------------------------------------------------------- +! Set version of warm-rain autoconversion/accretion +! IWARMRAIN = 1 ! Sundquist +! IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) +! --------------------------------------------------------------------- +IWARMRAIN = 2 +! --------------------------------------------------------------------- +! Set version of rain evaporation +! IEVAPRAIN = 1 ! Sundquist +! IEVAPRAIN = 2 ! Abel and Boutle (2013) +! --------------------------------------------------------------------- +IEVAPRAIN = 2 +! --------------------------------------------------------------------- +! Set version of snow evaporation +! IEVAPSNOW = 1 ! Sundquist +! IEVAPSNOW = 2 ! New +! --------------------------------------------------------------------- +IEVAPSNOW = 1 +! --------------------------------------------------------------------- +! Set version of ice deposition +! IDEPICE = 1 ! Rotstayn (2001) +! IDEPICE = 2 ! New +! --------------------------------------------------------------------- +IDEPICE = 1 + +! --------------------- +! Some simple constants +! --------------------- +ZQTMST = 1.0_JPRB/PTSPHY +ZGDCP = RG/RCPD +ZRDCP = RD/RCPD +ZCONS1A = RCPD/(RLMLT*RG*RTAUMEL) +ZEPSEC = 1.E-14_JPRB +ZRG_R = 1.0_JPRB/RG +ZRLDCP = 1.0_JPRB/(RALSDCP-RALVDCP) + +! Note: Defined in module/yoecldp.F90 +! NCLDQL=1 ! liquid cloud water +! NCLDQI=2 ! ice cloud water +! NCLDQR=3 ! rain water +! NCLDQS=4 ! snow +! NCLDQV=5 ! vapour + +! ----------------------------------------------- +! Define species phase, 0=vapour, 1=liquid, 2=ice +! ----------------------------------------------- +IPHASE(NCLDQV)=0 +IPHASE(NCLDQL)=1 +IPHASE(NCLDQR)=1 +IPHASE(NCLDQI)=2 +IPHASE(NCLDQS)=2 + +! --------------------------------------------------- +! Set up melting/freezing index, +! if an ice category melts/freezes, where does it go? +! --------------------------------------------------- +IMELT(NCLDQV)=-99 +IMELT(NCLDQL)=NCLDQI +IMELT(NCLDQR)=NCLDQS +IMELT(NCLDQI)=NCLDQR +IMELT(NCLDQS)=NCLDQR + +! ----------------------------------------------- +! INITIALIZATION OF OUTPUT TENDENCIES +! ----------------------------------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + tendency_loc%T(JL,JK)=0.0_JPRB + tendency_loc%q(JL,JK)=0.0_JPRB + tendency_loc%a(JL,JK)=0.0_JPRB + ENDDO +ENDDO +DO JM=1,NCLV-1 + DO JK=1,KLEV + DO JL=KIDIA,KFDIA + tendency_loc%cld(JL,JK,JM)=0.0_JPRB + ENDDO + ENDDO +ENDDO + +! ------------------------- +! set up fall speeds in m/s +! ------------------------- +ZVQX(NCLDQV)=0.0_JPRB +ZVQX(NCLDQL)=0.0_JPRB +ZVQX(NCLDQI)=RVICE +ZVQX(NCLDQR)=RVRAIN +ZVQX(NCLDQS)=RVSNOW +LLFALL(:)=.FALSE. +DO JM=1,NCLV + IF (ZVQX(JM)>0.0_JPRB) LLFALL(JM)=.TRUE. ! falling species +ENDDO +! Set LLFALL to false for ice (but ice still sediments!) +! Need to rationalise this at some point +LLFALL(NCLDQI)=.FALSE. + + +!###################################################################### +! 1. *** INITIAL VALUES FOR VARIABLES *** +!###################################################################### + + +! ---------------------- +! non CLV initialization +! ---------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + ZTP1(JL,JK) = PT(JL,JK)+PTSPHY*tendency_tmp%T(JL,JK) + ZQX(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*tendency_tmp%q(JL,JK) + ZQX0(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*tendency_tmp%q(JL,JK) + ZA(JL,JK) = PA(JL,JK)+PTSPHY*tendency_tmp%a(JL,JK) + ZAORIG(JL,JK) = PA(JL,JK)+PTSPHY*tendency_tmp%a(JL,JK) + ENDDO +ENDDO + +! ------------------------------------- +! initialization for CLV family +! ------------------------------------- +DO JM=1,NCLV-1 + DO JK=1,KLEV + DO JL=KIDIA,KFDIA + ZQX(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*tendency_tmp%cld(JL,JK,JM) + ZQX0(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*tendency_tmp%cld(JL,JK,JM) + ENDDO + ENDDO +ENDDO + +!------------- +! zero arrays +!------------- +ZPFPLSX(:,:,:) = 0.0_JPRB ! precip fluxes +ZQXN2D(:,:,:) = 0.0_JPRB ! end of timestep values in 2D +ZLNEG(:,:,:) = 0.0_JPRB ! negative input check +PRAINFRAC_TOPRFZ(:) =0.0_JPRB ! rain fraction at top of refreezing layer +LLRAINLIQ(:) = .TRUE. ! Assume all raindrops are liquid initially + +! ---------------------------------------------------- +! Tidy up very small cloud cover or total cloud water +! ---------------------------------------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + IF (ZQX(JL,JK,NCLDQL)+ZQX(JL,JK,NCLDQI)273K + !--------------------------------------------- + ZALFA=FOEDELTA(ZTP1(JL,JK)) + ZFOEEW(JL,JK)=MIN((ZALFA*FOEELIQ(ZTP1(JL,JK))+ & + & (1.0_JPRB-ZALFA)*FOEEICE(ZTP1(JL,JK)))/PAP(JL,JK),0.5_JPRB) + ZFOEEW(JL,JK)=MIN(0.5_JPRB,ZFOEEW(JL,JK)) + ZQSICE(JL,JK)=ZFOEEW(JL,JK)/(1.0_JPRB-RETV*ZFOEEW(JL,JK)) + + !---------------------------------- + ! liquid water saturation + !---------------------------------- + ZFOEELIQT(JL,JK)=MIN(FOEELIQ(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + ZQSLIQ(JL,JK)=ZFOEELIQT(JL,JK) + ZQSLIQ(JL,JK)=ZQSLIQ(JL,JK)/(1.0_JPRB-RETV*ZQSLIQ(JL,JK)) + +! !---------------------------------- +! ! ice water saturation +! !---------------------------------- +! ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) +! ZQSICE(JL,JK)=ZFOEEICET(JL,JK) +! ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + ENDDO + +ENDDO + +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + + + !------------------------------------------ + ! Ensure cloud fraction is between 0 and 1 + !------------------------------------------ + ZA(JL,JK)=MAX(0.0_JPRB,MIN(1.0_JPRB,ZA(JL,JK))) + + !------------------------------------------------------------------- + ! Calculate liq/ice fractions (no longer a diagnostic relationship) + !------------------------------------------------------------------- + ZLI(JL,JK)=ZQX(JL,JK,NCLDQL)+ZQX(JL,JK,NCLDQI) + IF (ZLI(JL,JK)>RLMIN) THEN + ZLIQFRAC(JL,JK)=ZQX(JL,JK,NCLDQL)/ZLI(JL,JK) + ZICEFRAC(JL,JK)=1.0_JPRB-ZLIQFRAC(JL,JK) + ELSE + ZLIQFRAC(JL,JK)=0.0_JPRB + ZICEFRAC(JL,JK)=0.0_JPRB + ENDIF + + ENDDO +ENDDO + +!###################################################################### +! 2. *** CONSTANTS AND PARAMETERS *** +!###################################################################### +! Calculate L in updrafts of bl-clouds +! Specify QS, P/PS for tropopause (for c2) +! And initialize variables +!------------------------------------------ + +!--------------------------------- +! Find tropopause level (ZTRPAUS) +!--------------------------------- +DO JL=KIDIA,KFDIA + ZTRPAUS(JL)=0.1_JPRB + ZPAPHD(JL)=1.0_JPRB/PAPH(JL,KLEV+1) +ENDDO +DO JK=1,KLEV-1 + DO JL=KIDIA,KFDIA + ZSIG=PAP(JL,JK)*ZPAPHD(JL) + IF (ZSIG>0.1_JPRB.AND.ZSIG<0.4_JPRB.AND.ZTP1(JL,JK)>ZTP1(JL,JK+1)) THEN + ZTRPAUS(JL)=ZSIG + ENDIF + ENDDO +ENDDO + +!----------------------------- +! Reset single level variables +!----------------------------- + +ZANEWM1(:) = 0.0_JPRB +ZDA(:) = 0.0_JPRB +ZCOVPCLR(:) = 0.0_JPRB +ZCOVPMAX(:) = 0.0_JPRB +ZCOVPTOT(:) = 0.0_JPRB +ZCLDTOPDIST(:) = 0.0_JPRB + +!###################################################################### +! 3. *** PHYSICS *** +!###################################################################### + + +!---------------------------------------------------------------------- +! START OF VERTICAL LOOP +!---------------------------------------------------------------------- + +DO JK=NCLDTOP,KLEV + +!---------------------------------------------------------------------- +! 3.0 INITIALIZE VARIABLES +!---------------------------------------------------------------------- + + !--------------------------------- + ! First guess microphysics + !--------------------------------- + DO JM=1,NCLV + DO JL=KIDIA,KFDIA + ZQXFG(JL,JM)=ZQX(JL,JK,JM) + ENDDO + ENDDO + + !--------------------------------- + ! Set KLON arrays to zero + !--------------------------------- + + ZLICLD(:) = 0.0_JPRB + ZRAINAUT(:) = 0.0_JPRB ! currently needed for diags + ZRAINACC(:) = 0.0_JPRB ! currently needed for diags + ZSNOWAUT(:) = 0.0_JPRB ! needed + ZLDEFR(:) = 0.0_JPRB + ZACUST(:) = 0.0_JPRB ! set later when needed + ZQPRETOT(:) = 0.0_JPRB + ZLFINALSUM(:)= 0.0_JPRB + + ! Required for first guess call + ZLCOND1(:) = 0.0_JPRB + ZLCOND2(:) = 0.0_JPRB + ZSUPSAT(:) = 0.0_JPRB + ZLEVAPL(:) = 0.0_JPRB + ZLEVAPI(:) = 0.0_JPRB + + !------------------------------------- + ! solvers for cloud fraction + !------------------------------------- + ZSOLAB(:) = 0.0_JPRB + ZSOLAC(:) = 0.0_JPRB + + !------------------------------------------ + ! reset matrix so missing pathways are set + !------------------------------------------ + ZSOLQB(:,:,:) = 0.0_JPRB + ZSOLQA(:,:,:) = 0.0_JPRB + + !---------------------------------- + ! reset new microphysics variables + !---------------------------------- + ZFALLSRCE(:,:) = 0.0_JPRB + ZFALLSINK(:,:) = 0.0_JPRB + ZCONVSRCE(:,:) = 0.0_JPRB + ZCONVSINK(:,:) = 0.0_JPRB + ZPSUPSATSRCE(:,:) = 0.0_JPRB + ZRATIO(:,:) = 0.0_JPRB + ZICETOT(:) = 0.0_JPRB + + DO JL=KIDIA,KFDIA + + !------------------------- + ! derived variables needed + !------------------------- + + ZDP(JL) = PAPH(JL,JK+1)-PAPH(JL,JK) ! dp + ZGDP(JL) = RG/ZDP(JL) ! g/dp + ZRHO(JL) = PAP(JL,JK)/(RD*ZTP1(JL,JK)) ! p/RT air density + + ZDTGDP(JL) = PTSPHY*ZGDP(JL) ! dt g/dp + ZRDTGDP(JL) = ZDP(JL)*(1.0_JPRB/(PTSPHY*RG)) ! 1/(dt g/dp) + + IF (JK>1) ZDTGDPF(JL) = PTSPHY*RG/(PAP(JL,JK)-PAP(JL,JK-1)) + + !------------------------------------ + ! Calculate dqs/dT correction factor + !------------------------------------ + ! Reminder: RETV=RV/RD-1 + + ! liquid + ZFACW = R5LES/((ZTP1(JL,JK)-R4LES)**2) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEELIQT(JL,JK)) + ZDQSLIQDT(JL) = ZFACW*ZCOR*ZQSLIQ(JL,JK) + ZCORQSLIQ(JL) = 1.0_JPRB+RALVDCP*ZDQSLIQDT(JL) + + ! ice + ZFACI = R5IES/((ZTP1(JL,JK)-R4IES)**2) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEEW(JL,JK)) + ZDQSICEDT(JL) = ZFACI*ZCOR*ZQSICE(JL,JK) + ZCORQSICE(JL) = 1.0_JPRB+RALSDCP*ZDQSICEDT(JL) + + ! diagnostic mixed + ZALFAW = ZFOEALFA(JL,JK) + ZALFAWM(JL) = ZALFAW + ZFAC = ZALFAW*ZFACW+(1.0_JPRB-ZALFAW)*ZFACI + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEEWMT(JL,JK)) + ZDQSMIXDT(JL) = ZFAC*ZCOR*ZQSMIX(JL,JK) + ZCORQSMIX(JL) = 1.0_JPRB+FOELDCPM(ZTP1(JL,JK))*ZDQSMIXDT(JL) + + ! evaporation/sublimation limits + ZEVAPLIMMIX(JL) = MAX((ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSMIX(JL),0.0_JPRB) + ZEVAPLIMLIQ(JL) = MAX((ZQSLIQ(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSLIQ(JL),0.0_JPRB) + ZEVAPLIMICE(JL) = MAX((ZQSICE(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSICE(JL),0.0_JPRB) + + !-------------------------------- + ! in-cloud consensate amount + !-------------------------------- + ZTMPA = 1.0_JPRB/MAX(ZA(JL,JK),ZEPSEC) + ZLIQCLD(JL) = ZQX(JL,JK,NCLDQL)*ZTMPA + ZICECLD(JL) = ZQX(JL,JK,NCLDQI)*ZTMPA + ZLICLD(JL) = ZLIQCLD(JL)+ZICECLD(JL) + + ENDDO + + !------------------------------------------------ + ! Evaporate very small amounts of liquid and ice + !------------------------------------------------ + DO JL=KIDIA,KFDIA + + IF (ZQX(JL,JK,NCLDQL) < RLMIN) THEN + ZSOLQA(JL,NCLDQV,NCLDQL) = ZQX(JL,JK,NCLDQL) + ZSOLQA(JL,NCLDQL,NCLDQV) = -ZQX(JL,JK,NCLDQL) + ENDIF + + IF (ZQX(JL,JK,NCLDQI) < RLMIN) THEN + ZSOLQA(JL,NCLDQV,NCLDQI) = ZQX(JL,JK,NCLDQI) + ZSOLQA(JL,NCLDQI,NCLDQV) = -ZQX(JL,JK,NCLDQI) + ENDIF + + ENDDO + + !--------------------------------------------------------------------- + ! 3.1 ICE SUPERSATURATION ADJUSTMENT + !--------------------------------------------------------------------- + ! Note that the supersaturation adjustment is made with respect to + ! liquid saturation: when T>0C + ! ice saturation: when T<0C + ! with an adjustment made to allow for ice + ! supersaturation in the clear sky + ! Note also that the KOOP factor automatically clips the supersaturation + ! to a maximum set by the liquid water saturation mixing ratio + ! important for temperatures near to but below 0C + !----------------------------------------------------------------------- + +!DIR$ NOFUSION + DO JL=KIDIA,KFDIA + + !----------------------------------- + ! 3.1.1 Supersaturation limit (from Koop) + !----------------------------------- + ! Needs to be set for all temperatures + ZFOKOOP(JL)=FOKOOP(ZTP1(JL,JK)) + ENDDO + DO JL=KIDIA,KFDIA + + IF (ZTP1(JL,JK)>=RTT .OR. NSSOPT==0) THEN + ZFAC = 1.0_JPRB + ZFACI = 1.0_JPRB + ELSE + ZFAC = ZA(JL,JK)+ZFOKOOP(JL)*(1.0_JPRB-ZA(JL,JK)) + ZFACI = PTSPHY/RKOOPTAU + ENDIF + + !------------------------------------------------------------------- + ! 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + ! correction factor + ! [#Note: QSICE or QSLIQ] + !------------------------------------------------------------------- + + ! Calculate supersaturation to add to cloud + IF (ZA(JL,JK) > 1.0_JPRB-RAMIN) THEN + ZSUPSAT(JL) = MAX((ZQX(JL,JK,NCLDQV)-ZFAC*ZQSICE(JL,JK))/ZCORQSICE(JL)& + & ,0.0_JPRB) + ELSE + ! Calculate environmental humidity supersaturation + ZQP1ENV = (ZQX(JL,JK,NCLDQV) - ZA(JL,JK)*ZQSICE(JL,JK))/ & + & MAX(1.0_JPRB-ZA(JL,JK),ZEPSILON) + !& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + ZSUPSAT(JL) = MAX((1.0_JPRB-ZA(JL,JK))*(ZQP1ENV-ZFAC*ZQSICE(JL,JK))& + & /ZCORQSICE(JL),0.0_JPRB) + ENDIF + + !------------------------------------------------------------------- + ! Here the supersaturation is turned into liquid water + ! However, if the temperature is below the threshold for homogeneous + ! freezing then the supersaturation is turned instantly to ice. + !-------------------------------------------------------------------- + + IF (ZSUPSAT(JL) > ZEPSEC) THEN + + IF (ZTP1(JL,JK) > RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)+ZSUPSAT(JL) + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)-ZSUPSAT(JL) + ! Include liquid in first guess + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZSUPSAT(JL) + ELSE + ! Turn supersaturation into ice water + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)+ZSUPSAT(JL) + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)-ZSUPSAT(JL) + ! Add ice to first guess for deposition term + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZSUPSAT(JL) + ENDIF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC(JL) = (1.0_JPRB-ZA(JL,JK))*ZFACI + + ENDIF + + !------------------------------------------------------- + ! 3.1.3 Include supersaturation from previous timestep + ! (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + !------------------------------------------------------- + IF (PSUPSAT(JL,JK)>ZEPSEC) THEN + IF (ZTP1(JL,JK) > RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(JL,NCLDQL,NCLDQL) = ZSOLQA(JL,NCLDQL,NCLDQL)+PSUPSAT(JL,JK) + ZPSUPSATSRCE(JL,NCLDQL) = PSUPSAT(JL,JK) + ! Add liquid to first guess for deposition term + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+PSUPSAT(JL,JK) + ! Store cloud budget diagnostics if required + ELSE + ! Turn supersaturation into ice water + ZSOLQA(JL,NCLDQI,NCLDQI) = ZSOLQA(JL,NCLDQI,NCLDQI)+PSUPSAT(JL,JK) + ZPSUPSATSRCE(JL,NCLDQI) = PSUPSAT(JL,JK) + ! Add ice to first guess for deposition term + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+PSUPSAT(JL,JK) + ! Store cloud budget diagnostics if required + ENDIF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC(JL)=(1.0_JPRB-ZA(JL,JK))*ZFACI + ! Store cloud budget diagnostics if required + ENDIF + + ENDDO ! on JL + + !--------------------------------------------------------------------- + ! 3.2 DETRAINMENT FROM CONVECTION + !--------------------------------------------------------------------- + ! * Diagnostic T-ice/liq split retained for convection + ! Note: This link is now flexible and a future convection + ! scheme can detrain explicit seperate budgets of: + ! cloud water, ice, rain and snow + ! * There is no (1-ZA) multiplier term on the cloud detrainment + ! term, since is now written in mass-flux terms + ! [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + !--------------------------------------------------------------------- + IF (JK < KLEV .AND. JK>=NCLDTOP) THEN + + DO JL=KIDIA,KFDIA + + PLUDE(JL,JK)=PLUDE(JL,JK)*ZDTGDP(JL) + + IF(LDCUM(JL).AND.PLUDE(JL,JK) > RLMIN.AND.PLU(JL,JK+1)> ZEPSEC) THEN + + ZSOLAC(JL)=ZSOLAC(JL)+PLUDE(JL,JK)/PLU(JL,JK+1) + ! *diagnostic temperature split* + ZALFAW = ZFOEALFA(JL,JK) + ZCONVSRCE(JL,NCLDQL) = ZALFAW*PLUDE(JL,JK) + ZCONVSRCE(JL,NCLDQI) = (1.0_JPRB-ZALFAW)*PLUDE(JL,JK) + ZSOLQA(JL,NCLDQL,NCLDQL) = ZSOLQA(JL,NCLDQL,NCLDQL)+ZCONVSRCE(JL,NCLDQL) + ZSOLQA(JL,NCLDQI,NCLDQI) = ZSOLQA(JL,NCLDQI,NCLDQI)+ZCONVSRCE(JL,NCLDQI) + + ELSE + + PLUDE(JL,JK)=0.0_JPRB + + ENDIF + ! *convective snow detrainment source + IF (LDCUM(JL)) ZSOLQA(JL,NCLDQS,NCLDQS) = ZSOLQA(JL,NCLDQS,NCLDQS) + PSNDE(JL,JK)*ZDTGDP(JL) + + ENDDO + + ENDIF ! JK NCLDTOP) THEN + + DO JL=KIDIA,KFDIA + ZMF(JL)=MAX(0.0_JPRB,(PMFU(JL,JK)+PMFD(JL,JK))*ZDTGDP(JL)) + ZACUST(JL)=ZMF(JL)*ZANEWM1(JL) + ENDDO + + DO JM=1,NCLV + IF (.NOT.LLFALL(JM).AND.IPHASE(JM)>0) THEN + DO JL=KIDIA,KFDIA + ZLCUST(JL,JM)=ZMF(JL)*ZQXNM1(JL,JM) + ! record total flux for enthalpy budget: + ZCONVSRCE(JL,JM)=ZCONVSRCE(JL,JM)+ZLCUST(JL,JM) + ENDDO + ENDIF + ENDDO + + ! Now have to work out how much liquid evaporates at arrival point + ! since there is no prognostic memory for in-cloud humidity, i.e. + ! we always assume cloud is saturated. + + DO JL=KIDIA,KFDIA + ZDTDP=ZRDCP*0.5_JPRB*(ZTP1(JL,JK-1)+ZTP1(JL,JK))/PAPH(JL,JK) + ZDTFORC = ZDTDP*(PAP(JL,JK)-PAP(JL,JK-1)) + ![#Note: Diagnostic mixed phase should be replaced below] + ZDQS(JL)=ZANEWM1(JL)*ZDTFORC*ZDQSMIXDT(JL) + ENDDO + + DO JM=1,NCLV + IF (.NOT.LLFALL(JM).AND.IPHASE(JM)>0) THEN + DO JL=KIDIA,KFDIA + ZLFINAL=MAX(0.0_JPRB,ZLCUST(JL,JM)-ZDQS(JL)) !lim to zero + ! no supersaturation allowed incloud ---V + ZEVAP=MIN((ZLCUST(JL,JM)-ZLFINAL),ZEVAPLIMMIX(JL)) +! ZEVAP=0.0_JPRB + ZLFINAL=ZLCUST(JL,JM)-ZEVAP + ZLFINALSUM(JL)=ZLFINALSUM(JL)+ZLFINAL ! sum + + ZSOLQA(JL,JM,JM) = ZSOLQA(JL,JM,JM)+ZLCUST(JL,JM) ! whole sum + ZSOLQA(JL,NCLDQV,JM) = ZSOLQA(JL,NCLDQV,JM)+ZEVAP + ZSOLQA(JL,JM,NCLDQV) = ZSOLQA(JL,JM,NCLDQV)-ZEVAP + ENDDO + ENDIF + ENDDO + + ! Reset the cloud contribution if no cloud water survives to this level: + DO JL=KIDIA,KFDIA + IF (ZLFINALSUM(JL)NCLDTOP + + !--------------------------------------------------------------------- + ! Subsidence sink of cloud to the layer below + ! (Implicit - re. CFL limit on convective mass flux) + !--------------------------------------------------------------------- + + DO JL=KIDIA,KFDIA + + IF(JK 0 .AND. PLUDE(JL,JK) > ZEPSEC)& + & ZLDIFDT(JL)=RCLDIFF_CONVI*ZLDIFDT(JL) + ENDDO + + ! At the moment, works on mixed RH profile and partitioned ice/liq fraction + ! so that it is similar to previous scheme + ! Should apply RHw for liquid cloud and RHi for ice cloud separately + DO JL=KIDIA,KFDIA + IF(ZLI(JL,JK) > ZEPSEC) THEN + ! Calculate environmental humidity +! ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& +! & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) +! ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB) + ZLEROS=ZA(JL,JK)*ZE + ZLEROS=MIN(ZLEROS,ZEVAPLIMMIX(JL)) + ZLEROS=MIN(ZLEROS,ZLI(JL,JK)) + ZAEROS=ZLEROS/ZLICLD(JL) !if linear term + + ! Erosion is -ve LINEAR in L,A + ZSOLAC(JL)=ZSOLAC(JL)-ZAEROS !linear + + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)+ZLIQFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)-ZLIQFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)+ZICEFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)-ZICEFRAC(JL,JK)*ZLEROS + + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + !---------------------------------------------------------------------- + ! calculate dqs/dt + ! Note: For the separate prognostic Qi and Ql, one would ideally use + ! Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + ! forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + ! These would then instantaneous freeze if T<-38C or lead to ice growth + ! by deposition in warmer mixed phase clouds. However, since we do + ! not have a separate prognostic equation for in-cloud humidity or a + ! statistical scheme approach in place, the depositional growth of ice + ! in the mixed phase can not be modelled and we resort to supersaturation + ! wrt ice instanteously converting to ice over one timestep + ! (see Tompkins et al. QJRMS 2007 for details) + ! Thus for the initial implementation the diagnostic mixed phase is + ! retained for the moment, and the level of approximation noted. + !---------------------------------------------------------------------- + + DO JL=KIDIA,KFDIA + ZDTDP = ZRDCP*ZTP1(JL,JK)/PAP(JL,JK) + ZDPMXDT = ZDP(JL)*ZQTMST + ZMFDN = 0.0_JPRB + IF(JK < KLEV) ZMFDN=PMFU(JL,JK+1)+PMFD(JL,JK+1) + ZWTOT = PVERVEL(JL,JK)+0.5_JPRB*RG*(PMFU(JL,JK)+PMFD(JL,JK)+ZMFDN) + ZWTOT = MIN(ZDPMXDT,MAX(-ZDPMXDT,ZWTOT)) + ZZZDT = PHRSW(JL,JK)+PHRLW(JL,JK) + ZDTDIAB = MIN(ZDPMXDT*ZDTDP,MAX(-ZDPMXDT*ZDTDP,ZZZDT))& + & *PTSPHY+RALFDCP*ZLDEFR(JL) +! Note: ZLDEFR should be set to the difference between the mixed phase functions +! in the convection and cloud scheme, but this is not calculated, so is zero and +! the functions must be the same + ZDTFORC = ZDTDP*ZWTOT*PTSPHY+ZDTDIAB + ZQOLD(JL) = ZQSMIX(JL,JK) + ZTOLD(JL) = ZTP1(JL,JK) + ZTP1(JL,JK) = ZTP1(JL,JK)+ZDTFORC + ZTP1(JL,JK) = MAX(ZTP1(JL,JK),160.0_JPRB) + LLFLAG(JL) = .TRUE. + ENDDO + + ! Formerly a call to CUADJTQ(..., ICALL=5) + DO JL=KIDIA,KFDIA + ZQP = 1.0_JPRB/PAP(JL,JK) + ZQSAT = FOEEWM(ZTP1(JL,JK))*ZQP + ZQSAT = MIN(0.5_JPRB,ZQSAT) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV *ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND = (ZQSMIX(JL,JK)-ZQSAT)/(1.0_JPRB+ZQSAT*ZCOR*FOEDEM(ZTP1(JL,JK))) + ZTP1(JL,JK) = ZTP1(JL,JK)+FOELDCPM(ZTP1(JL,JK))*ZCOND + ZQSMIX(JL,JK) = ZQSMIX(JL,JK)-ZCOND + ZQSAT = FOEEWM(ZTP1(JL,JK))*ZQP + ZQSAT = MIN(0.5_JPRB,ZQSAT) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV *ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND1= (ZQSMIX(JL,JK)-ZQSAT)/(1.0_JPRB+ZQSAT*ZCOR*FOEDEM(ZTP1(JL,JK))) + ZTP1(JL,JK) = ZTP1(JL,JK)+FOELDCPM(ZTP1(JL,JK))*ZCOND1 + ZQSMIX(JL,JK) = ZQSMIX(JL,JK)-ZCOND1 + ENDDO + + DO JL=KIDIA,KFDIA + ZDQS(JL) = ZQSMIX(JL,JK)-ZQOLD(JL) + ZQSMIX(JL,JK) = ZQOLD(JL) + ZTP1(JL,JK) = ZTOLD(JL) + ENDDO + + !---------------------------------------------------------------------- + ! 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + ! ---------------------------------------------------------------------- + ! Erosion term is LINEAR in L + ! Changed to be uniform distribution in cloud region + + DO JL=KIDIA,KFDIA + + ! Previous function based on DELTA DISTRIBUTION in cloud: + IF (ZDQS(JL) > 0.0_JPRB) THEN +! If subsidence evaporation term is turned off, then need to use updated +! liquid and cloud here? +! ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + ZLEVAP = ZA(JL,JK)*MIN(ZDQS(JL),ZLICLD(JL)) + ZLEVAP = MIN(ZLEVAP,ZEVAPLIMMIX(JL)) + ZLEVAP = MIN(ZLEVAP,MAX(ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB)) + + ! For first guess call + ZLEVAPL(JL) = ZLIQFRAC(JL,JK)*ZLEVAP + ZLEVAPI(JL) = ZICEFRAC(JL,JK)*ZLEVAP + + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)+ZLIQFRAC(JL,JK)*ZLEVAP + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)-ZLIQFRAC(JL,JK)*ZLEVAP + + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)+ZICEFRAC(JL,JK)*ZLEVAP + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)-ZICEFRAC(JL,JK)*ZLEVAP + + ENDIF + + ENDDO + + !---------------------------------------------------------------------- + ! 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + !---------------------------------------------------------------------- + ! (1) Increase of cloud water in existing clouds + DO JL=KIDIA,KFDIA + IF(ZA(JL,JK) > ZEPSEC.AND.ZDQS(JL) <= -RLMIN) THEN + + ZLCOND1(JL)=MAX(-ZDQS(JL),0.0_JPRB) !new limiter + +!old limiter (significantly improves upper tropospheric humidity rms) + IF(ZA(JL,JK) > 0.99_JPRB) THEN + ZCOR=1.0_JPRB/(1.0_JPRB-RETV*ZQSMIX(JL,JK)) + ZCDMAX=(ZQX(JL,JK,NCLDQV)-ZQSMIX(JL,JK))/& + & (1.0_JPRB+ZCOR*ZQSMIX(JL,JK)*FOEDEM(ZTP1(JL,JK))) + ELSE + ZCDMAX=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/ZA(JL,JK) + ENDIF + ZLCOND1(JL)=MAX(MIN(ZLCOND1(JL),ZCDMAX),0.0_JPRB) +! end old limiter + + ZLCOND1(JL)=ZA(JL,JK)*ZLCOND1(JL) + IF(ZLCOND1(JL) < RLMIN) ZLCOND1(JL)=0.0_JPRB + + !------------------------------------------------------------------------- + ! All increase goes into liquid unless so cold cloud homogeneously freezes + ! Include new liquid formation in first guess value, otherwise liquid + ! remains at cold temperatures until next timestep. + !------------------------------------------------------------------------- + IF (ZTP1(JL,JK)>RTHOMO) THEN + ZSOLQA(JL,NCLDQL,NCLDQV)=ZSOLQA(JL,NCLDQL,NCLDQV)+ZLCOND1(JL) + ZSOLQA(JL,NCLDQV,NCLDQL)=ZSOLQA(JL,NCLDQV,NCLDQL)-ZLCOND1(JL) + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZLCOND1(JL) + ELSE + ZSOLQA(JL,NCLDQI,NCLDQV)=ZSOLQA(JL,NCLDQI,NCLDQV)+ZLCOND1(JL) + ZSOLQA(JL,NCLDQV,NCLDQI)=ZSOLQA(JL,NCLDQV,NCLDQI)-ZLCOND1(JL) + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZLCOND1(JL) + ENDIF + ENDIF + ENDDO + + ! (2) Generation of new clouds (da/dt>0) + + DO JL=KIDIA,KFDIA + + IF(ZDQS(JL) <= -RLMIN .AND. ZA(JL,JK)<1.0_JPRB-ZEPSEC) THEN + + !--------------------------- + ! Critical relative humidity + !--------------------------- + ZRHC=RAMID + ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + ! Increase RHcrit to 1.0 towards the surface (eta>0.8) + IF(ZSIGK > 0.8_JPRB) THEN + ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + ENDIF + +! Commented out for CY37R1 to reduce humidity in high trop and strat +! ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above +! ZBOTT=ZTRPAUS(JL)+0.2_JPRB +! IF(ZSIGK < ZBOTT) THEN +! ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) +! ENDIF + + !--------------------------- + ! Supersaturation options + !--------------------------- + IF (NSSOPT==0) THEN + ! No scheme + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSICE(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZQE=MAX(0.0_JPRB,ZQE) + ELSEIF (NSSOPT==1) THEN + ! Tompkins + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSICE(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZQE=MAX(0.0_JPRB,ZQE) + ELSEIF (NSSOPT==2) THEN + ! Lohmann and Karcher + ZQE=ZQX(JL,JK,NCLDQV) + ELSEIF (NSSOPT==3) THEN + ! Gierens + ZQE=ZQX(JL,JK,NCLDQV)+ZLI(JL,JK) + ENDIF + + IF (ZTP1(JL,JK)>=RTT .OR. NSSOPT==0) THEN + ! No ice supersaturation allowed + ZFAC=1.0_JPRB + ELSE + ! Ice supersaturation + ZFAC=ZFOKOOP(JL) + ENDIF + + IF(ZQE >= ZRHC*ZQSICE(JL,JK)*ZFAC.AND.ZQERTHOMO) THEN + ZSOLQA(JL,NCLDQL,NCLDQV)=ZSOLQA(JL,NCLDQL,NCLDQV)+ZLCOND2(JL) + ZSOLQA(JL,NCLDQV,NCLDQL)=ZSOLQA(JL,NCLDQV,NCLDQL)-ZLCOND2(JL) + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZLCOND2(JL) + ELSE ! homogeneous freezing + ZSOLQA(JL,NCLDQI,NCLDQV)=ZSOLQA(JL,NCLDQI,NCLDQV)+ZLCOND2(JL) + ZSOLQA(JL,NCLDQV,NCLDQI)=ZSOLQA(JL,NCLDQV,NCLDQI)-ZLCOND2(JL) + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZLCOND2(JL) + ENDIF + + ENDIF + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 3.7 Growth of ice by vapour deposition + !---------------------------------------------------------------------- + ! Following Rotstayn et al. 2001: + ! does not use the ice nuclei number from cloudaer.F90 + ! but rather a simple Meyers et al. 1992 form based on the + ! supersaturation and assuming clouds are saturated with + ! respect to liquid water (well mixed), (or Koop adjustment) + ! Growth considered as sink of liquid water if present so + ! Bergeron-Findeisen adjustment in autoconversion term no longer needed + !---------------------------------------------------------------------- + + !-------------------------------------------------------- + !- + !- Ice deposition following Rotstayn et al. (2001) + !- (monodisperse ice particle size distribution) + !- + !-------------------------------------------------------- + IF (IDEPICE == 1) THEN + + DO JL=KIDIA,KFDIA + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JL,JK-1) < RCLDTOPCF .AND. ZA(JL,JK) >= RCLDTOPCF) THEN + ZCLDTOPDIST(JL) = 0.0_JPRB + ELSE + ZCLDTOPDIST(JL) = ZCLDTOPDIST(JL) + ZDP(JL)/(ZRHO(JL)*RG) + ENDIF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JL,JK)RLMIN) THEN ! T<273K + + ZVPICE=FOEEICE(ZTP1(JL,JK))*RV/RD + ZVPLIQ=ZVPICE*ZFOKOOP(JL) + ZICENUCLEI(JL)=1000.0_JPRB*EXP(12.96_JPRB*(ZVPLIQ-ZVPICE)/ZVPLIQ-0.639_JPRB) + + !------------------------------------------------ + ! 2.4e-2 is conductivity of air + ! 8.8 = 700**1/3 = density of ice to the third + !------------------------------------------------ + ZADD=RLSTT*(RLSTT/(RV*ZTP1(JL,JK))-1.0_JPRB)/(2.4E-2_JPRB*ZTP1(JL,JK)) + ZBDD=RV*ZTP1(JL,JK)*PAP(JL,JK)/(2.21_JPRB*ZVPICE) + ZCVDS=7.8_JPRB*(ZICENUCLEI(JL)/ZRHO(JL))**0.666_JPRB*(ZVPLIQ-ZVPICE) / & + & (8.87_JPRB*(ZADD+ZBDD)*ZVPICE) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0=MAX(ZICECLD(JL), ZICENUCLEI(JL)*RICEINIT/ZRHO(JL)) + + !------------------ + ! new value of ice: + !------------------ + ZINEW=(0.666_JPRB*ZCVDS*PTSPHY+ZICE0**0.666_JPRB)**1.5_JPRB + + !--------------------------- + ! grid-mean deposition rate: + !--------------------------- + ZDEPOS=MAX(ZA(JL,JK)*(ZINEW-ZICE0),0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS=MIN(ZDEPOS,ZQXFG(JL,NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- +! ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI(JL)/15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB-ZINFACTOR)* & + & (RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH),1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(JL,NCLDQI,NCLDQL)=ZSOLQA(JL,NCLDQI,NCLDQL)+ZDEPOS + ZSOLQA(JL,NCLDQL,NCLDQI)=ZSOLQA(JL,NCLDQL,NCLDQI)-ZDEPOS + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZDEPOS + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)-ZDEPOS + + ENDIF + ENDDO + + !-------------------------------------------------------- + !- + !- Ice deposition assuming ice PSD + !- + !-------------------------------------------------------- + ELSEIF (IDEPICE == 2) THEN + + DO JL=KIDIA,KFDIA + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JL,JK-1) < RCLDTOPCF .AND. ZA(JL,JK) >= RCLDTOPCF) THEN + ZCLDTOPDIST(JL) = 0.0_JPRB + ELSE + ZCLDTOPDIST(JL) = ZCLDTOPDIST(JL) + ZDP(JL)/(ZRHO(JL)*RG) + ENDIF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JL,JK)RLMIN) THEN ! T<273K + + ZVPICE = FOEEICE(ZTP1(JL,JK))*RV/RD + ZVPLIQ = ZVPICE*ZFOKOOP(JL) + ZICENUCLEI(JL)=1000.0_JPRB*EXP(12.96_JPRB*(ZVPLIQ-ZVPICE)/ZVPLIQ-0.639_JPRB) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0=MAX(ZICECLD(JL), ZICENUCLEI(JL)*RICEINIT/ZRHO(JL)) + + ! Particle size distribution + ZTCG = 1.0_JPRB + ZFACX1I = 1.0_JPRB + + ZAPLUSB = RCL_APB1*ZVPICE-RCL_APB2*ZVPICE*ZTP1(JL,JK)+ & + & PAP(JL,JK)*RCL_APB3*ZTP1(JL,JK)**3._JPRB + ZCORRFAC = (1.0_JPRB/ZRHO(JL))**0.5_JPRB + ZCORRFAC2 = ((ZTP1(JL,JK)/273.0_JPRB)**1.5_JPRB) & + & *(393.0_JPRB/(ZTP1(JL,JK)+120.0_JPRB)) + + ZPR02 = ZRHO(JL)*ZICE0*RCL_CONST1I/(ZTCG*ZFACX1I) + + ZTERM1 = (ZVPLIQ-ZVPICE)*ZTP1(JL,JK)**2.0_JPRB*ZVPICE*ZCORRFAC2*ZTCG* & + & RCL_CONST2I*ZFACX1I/(ZRHO(JL)*ZAPLUSB*ZVPICE) + ZTERM2 = 0.65_JPRB*RCL_CONST6I*ZPR02**RCL_CONST4I+RCL_CONST3I & + & *ZCORRFAC**0.5_JPRB*ZRHO(JL)**0.5_JPRB & + & *ZPR02**RCL_CONST5I/ZCORRFAC2**0.5_JPRB + + ZDEPOS = MAX(ZA(JL,JK)*ZTERM1*ZTERM2*PTSPHY,0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS=MIN(ZDEPOS,ZQXFG(JL,NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI(JL)/15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB-ZINFACTOR)* & + & (RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH),1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(JL,NCLDQI,NCLDQL) = ZSOLQA(JL,NCLDQI,NCLDQL)+ZDEPOS + ZSOLQA(JL,NCLDQL,NCLDQI) = ZSOLQA(JL,NCLDQL,NCLDQI)-ZDEPOS + ZQXFG(JL,NCLDQI) = ZQXFG(JL,NCLDQI)+ZDEPOS + ZQXFG(JL,NCLDQL) = ZQXFG(JL,NCLDQL)-ZDEPOS + ENDIF + ENDDO + + ENDIF ! on IDEPICE + + !###################################################################### + ! 4 *** PRECIPITATION PROCESSES *** + !###################################################################### + + !---------------------------------- + ! revise in-cloud consensate amount + !---------------------------------- + DO JL=KIDIA,KFDIA + ZTMPA = 1.0_JPRB/MAX(ZA(JL,JK),ZEPSEC) + ZLIQCLD(JL) = ZQXFG(JL,NCLDQL)*ZTMPA + ZICECLD(JL) = ZQXFG(JL,NCLDQI)*ZTMPA + ZLICLD(JL) = ZLIQCLD(JL)+ZICECLD(JL) + ENDDO + + !---------------------------------------------------------------------- + ! 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + ! now that rain, snow, graupel species are prognostic + ! the precipitation flux can be defined directly level by level + ! There is no vertical memory required from the flux variable + !---------------------------------------------------------------------- + + DO JM = 1,NCLV + IF (LLFALL(JM) .OR. JM == NCLDQI) THEN + DO JL=KIDIA,KFDIA + !------------------------ + ! source from layer above + !------------------------ + IF (JK > NCLDTOP) THEN + ZFALLSRCE(JL,JM) = ZPFPLSX(JL,JK,JM)*ZDTGDP(JL) + ZSOLQA(JL,JM,JM) = ZSOLQA(JL,JM,JM)+ZFALLSRCE(JL,JM) + ZQXFG(JL,JM) = ZQXFG(JL,JM)+ZFALLSRCE(JL,JM) + ! use first guess precip----------V + ZQPRETOT(JL) = ZQPRETOT(JL)+ZQXFG(JL,JM) + ENDIF + !------------------------------------------------- + ! sink to next layer, constant fall speed + !------------------------------------------------- + ! if aerosol effect then override + ! note that for T>233K this is the same as above. + IF (LAERICESED .AND. JM == NCLDQI) THEN + ZRE_ICE=PRE_ICE(JL,JK) + ! The exponent value is from + ! Morrison et al. JAS 2005 Appendix + ZVQX(NCLDQI) = 0.002_JPRB*ZRE_ICE**1.0_JPRB + ENDIF + ZFALL=ZVQX(JM)*ZRHO(JL) + !------------------------------------------------- + ! modified by Heymsfield and Iaquinta JAS 2000 + !------------------------------------------------- + ! ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + ! &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + ZFALLSINK(JL,JM)=ZDTGDP(JL)*ZFALL + ! Cloud budget diagnostic stored at end as implicit + ENDDO ! jl + ENDIF ! LLFALL + ENDDO ! jm + + !--------------------------------------------------------------- + ! Precip cover overlap using MAX-RAN Overlap + ! Since precipitation is now prognostic we must + ! 1) apply an arbitrary minimum coverage (0.3) if precip>0 + ! 2) abandon the 2-flux clr/cld treatment + ! 3) Thus, since we have no memory of the clear sky precip + ! fraction, we mimic the previous method by reducing + ! ZCOVPTOT(JL), which has the memory, proportionally with + ! the precip evaporation rate, taking cloud fraction + ! into account + ! #3 above leads to much smoother vertical profiles of + ! precipitation fraction than the Klein-Jakob scheme which + ! monotonically increases precip fraction and then resets + ! it to zero in a step function once clear-sky precip reaches + ! zero. + !--------------------------------------------------------------- + DO JL=KIDIA,KFDIA + IF (ZQPRETOT(JL)>ZEPSEC) THEN + ZCOVPTOT(JL) = 1.0_JPRB - ((1.0_JPRB-ZCOVPTOT(JL))*& + & (1.0_JPRB - MAX(ZA(JL,JK),ZA(JL,JK-1)))/& + & (1.0_JPRB - MIN(ZA(JL,JK-1),1.0_JPRB-1.E-06_JPRB)) ) + ZCOVPTOT(JL) = MAX(ZCOVPTOT(JL),RCOVPMIN) + ZCOVPCLR(JL) = MAX(0.0_JPRB,ZCOVPTOT(JL)-ZA(JL,JK)) ! clear sky proportion + ZRAINCLD(JL) = ZQXFG(JL,NCLDQR)/ZCOVPTOT(JL) + ZSNOWCLD(JL) = ZQXFG(JL,NCLDQS)/ZCOVPTOT(JL) + ZCOVPMAX(JL) = MAX(ZCOVPTOT(JL),ZCOVPMAX(JL)) + ELSE + ZRAINCLD(JL) = 0.0_JPRB + ZSNOWCLD(JL) = 0.0_JPRB + ZCOVPTOT(JL) = 0.0_JPRB ! no flux - reset cover + ZCOVPCLR(JL) = 0.0_JPRB ! reset clear sky proportion + ZCOVPMAX(JL) = 0.0_JPRB ! reset max cover for ZZRH calc + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.3a AUTOCONVERSION TO SNOW + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + IF(ZTP1(JL,JK) <= RTT) THEN + !----------------------------------------------------- + ! Snow Autoconversion rate follow Lin et al. 1983 + !----------------------------------------------------- + IF (ZICECLD(JL)>ZEPSEC) THEN + + ZZCO=PTSPHY*RSNOWLIN1*EXP(RSNOWLIN2*(ZTP1(JL,JK)-RTT)) + + IF (LAERICEAUTO) THEN + ZLCRIT=PICRIT_AER(JL,JK) + ! 0.3 = N**0.333 with N=0.027 + ZZCO=ZZCO*(RNICE/PNICE(JL,JK))**0.333_JPRB + ELSE + ZLCRIT=RLCRITSNOW + ENDIF + + ZSNOWAUT(JL)=ZZCO*(1.0_JPRB-EXP(-(ZICECLD(JL)/ZLCRIT)**2)) + ZSOLQB(JL,NCLDQS,NCLDQI)=ZSOLQB(JL,NCLDQS,NCLDQI)+ZSNOWAUT(JL) + + ENDIF + ENDIF + + !---------------------------------------------------------------------- + ! 4.3b AUTOCONVERSION WARM CLOUDS + ! Collection and accretion will require separate treatment + ! but for now we keep this simple treatment + !---------------------------------------------------------------------- + + IF (ZLIQCLD(JL)>ZEPSEC) THEN + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Sundqvist (1989) + !- + !-------------------------------------------------------- + IF (IWARMRAIN == 1) THEN + + ZZCO=RKCONV*PTSPHY + + IF (LAERLIQAUTOLSP) THEN + ZLCRIT=PLCRIT_AER(JL,JK) + ! 0.3 = N**0.333 with N=125 cm-3 + ZZCO=ZZCO*(RCCN/PCCN(JL,JK))**0.333_JPRB + ELSE + ! Modify autoconversion threshold dependent on: + ! land (polluted, high CCN, smaller droplets, higher threshold) + ! sea (clean, low CCN, larger droplets, lower threshold) + IF (PLSM(JL) > 0.5_JPRB) THEN + ZLCRIT = RCLCRIT_LAND ! land + ELSE + ZLCRIT = RCLCRIT_SEA ! ocean + ENDIF + ENDIF + + !------------------------------------------------------------------ + ! Parameters for cloud collection by rain and snow. + ! Note that with new prognostic variable it is now possible + ! to REPLACE this with an explicit collection parametrization + !------------------------------------------------------------------ + ZPRECIP=(ZPFPLSX(JL,JK,NCLDQS)+ZPFPLSX(JL,JK,NCLDQR))/MAX(ZEPSEC,ZCOVPTOT(JL)) + ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB)) +! ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& +! &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + IF (LAERLIQCOLL) THEN + ! 5.0 = N**0.333 with N=125 cm-3 + ZCFPR=ZCFPR*(RCCN/PCCN(JL,JK))**0.333_JPRB + ENDIF + + ZZCO=ZZCO*ZCFPR + ZLCRIT=ZLCRIT/MAX(ZCFPR,ZEPSEC) + + IF(ZLIQCLD(JL)/ZLCRIT < 20.0_JPRB )THEN ! Security for exp for some compilers + ZRAINAUT(JL)=ZZCO*(1.0_JPRB-EXP(-(ZLIQCLD(JL)/ZLCRIT)**2)) + ELSE + ZRAINAUT(JL)=ZZCO + ENDIF + + ! rain freezes instantly + IF(ZTP1(JL,JK) <= RTT) THEN + ZSOLQB(JL,NCLDQS,NCLDQL)=ZSOLQB(JL,NCLDQS,NCLDQL)+ZRAINAUT(JL) + ELSE + ZSOLQB(JL,NCLDQR,NCLDQL)=ZSOLQB(JL,NCLDQR,NCLDQL)+ZRAINAUT(JL) + ENDIF + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Khairoutdinov and Kogan (2000) + !- + !-------------------------------------------------------- + ELSEIF (IWARMRAIN == 2) THEN + + IF (PLSM(JL) > 0.5_JPRB) THEN ! land + ZCONST = RCL_KK_CLOUD_NUM_LAND + ZLCRIT = RCLCRIT_LAND + ELSE ! ocean + ZCONST = RCL_KK_CLOUD_NUM_SEA + ZLCRIT = RCLCRIT_SEA + ENDIF + + IF (ZLIQCLD(JL) > ZLCRIT) THEN + + ZRAINAUT(JL) = 1.5_JPRB*ZA(JL,JK)*PTSPHY* & + & RCL_KKAau * ZLIQCLD(JL)**RCL_KKBauq * ZCONST**RCL_KKBaun + + ZRAINAUT(JL) = MIN(ZRAINAUT(JL),ZQXFG(JL,NCLDQL)) + IF (ZRAINAUT(JL) < ZEPSEC) ZRAINAUT(JL) = 0.0_JPRB + + ZRAINACC(JL) = 2.0_JPRB*ZA(JL,JK)*PTSPHY* & + & RCL_KKAac * (ZLIQCLD(JL)*ZRAINCLD(JL))**RCL_KKBac + + ZRAINACC(JL) = MIN(ZRAINACC(JL),ZQXFG(JL,NCLDQL)) + IF (ZRAINACC(JL) < ZEPSEC) ZRAINACC(JL) = 0.0_JPRB + + ELSE + ZRAINAUT(JL) = 0.0_JPRB + ZRAINACC(JL) = 0.0_JPRB + ENDIF + + ! If temperature < 0, then autoconversion produces snow rather than rain + ! Explicit + IF(ZTP1(JL,JK) <= RTT) THEN + ZSOLQA(JL,NCLDQS,NCLDQL)=ZSOLQA(JL,NCLDQS,NCLDQL)+ZRAINAUT(JL) + ZSOLQA(JL,NCLDQS,NCLDQL)=ZSOLQA(JL,NCLDQS,NCLDQL)+ZRAINACC(JL) + ZSOLQA(JL,NCLDQL,NCLDQS)=ZSOLQA(JL,NCLDQL,NCLDQS)-ZRAINAUT(JL) + ZSOLQA(JL,NCLDQL,NCLDQS)=ZSOLQA(JL,NCLDQL,NCLDQS)-ZRAINACC(JL) + ELSE + ZSOLQA(JL,NCLDQR,NCLDQL)=ZSOLQA(JL,NCLDQR,NCLDQL)+ZRAINAUT(JL) + ZSOLQA(JL,NCLDQR,NCLDQL)=ZSOLQA(JL,NCLDQR,NCLDQL)+ZRAINACC(JL) + ZSOLQA(JL,NCLDQL,NCLDQR)=ZSOLQA(JL,NCLDQL,NCLDQR)-ZRAINAUT(JL) + ZSOLQA(JL,NCLDQL,NCLDQR)=ZSOLQA(JL,NCLDQL,NCLDQR)-ZRAINACC(JL) + ENDIF + + ENDIF ! on IWARMRAIN + + ENDIF ! on ZLIQCLD > ZEPSEC + ENDDO + + + !---------------------------------------------------------------------- + ! RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + ! only active if T<0degC and supercooled liquid water is present + ! AND if not Sundquist autoconversion (as this includes riming) + !---------------------------------------------------------------------- + IF (IWARMRAIN > 1) THEN + + DO JL=KIDIA,KFDIA + IF(ZTP1(JL,JK) <= RTT .AND. ZLIQCLD(JL)>ZEPSEC) THEN + + ! Fallspeed air density correction + ZFALLCORR = (RDENSREF/ZRHO(JL))**0.4_JPRB + + !------------------------------------------------------------------ + ! Riming of snow by cloud water - implicit in lwc + !------------------------------------------------------------------ + IF (ZSNOWCLD(JL)>ZEPSEC .AND. ZCOVPTOT(JL)>0.01_JPRB) THEN + + ! Calculate riming term + ! Factor of liq water taken out because implicit + ZSNOWRIME(JL) = 0.3_JPRB*ZCOVPTOT(JL)*PTSPHY*RCL_CONST7S*ZFALLCORR & + & *(ZRHO(JL)*ZSNOWCLD(JL)*RCL_CONST1S)**RCL_CONST8S + + ! Limit snow riming term + ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + + ZSOLQB(JL,NCLDQS,NCLDQL) = ZSOLQB(JL,NCLDQS,NCLDQL) + ZSNOWRIME(JL) + + ENDIF + + !------------------------------------------------------------------ + ! Riming of ice by cloud water - implicit in lwc + ! NOT YET ACTIVE + !------------------------------------------------------------------ +! IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN +! +! ! Calculate riming term +! ! Factor of liq water taken out because implicit +! ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & +! & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S +! +! ! Limit ice riming term +! ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) +! +! ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) +! +! ENDIF + ENDIF + ENDDO + + ENDIF ! on IWARMRAIN > 1 + + + !---------------------------------------------------------------------- + ! 4.4a MELTING OF SNOW and ICE + ! with new implicit solver this also has to treat snow or ice + ! precipitating from the level above... i.e. local ice AND flux. + ! in situ ice and snow: could arise from LS advection or warming + ! falling ice and snow: arrives by precipitation process + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + ZICETOT(JL)=ZQXFG(JL,NCLDQI)+ZQXFG(JL,NCLDQS) + ZMELTMAX(JL) = 0.0_JPRB + + ! If there are frozen hydrometeors present and dry-bulb temperature > 0degC + IF(ZICETOT(JL) > ZEPSEC .AND. ZTP1(JL,JK) > RTT) THEN + + ! Calculate subsaturation + ZSUBSAT = MAX(ZQSICE(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB) + + ! Calculate difference between dry-bulb (ZTP1) and the temperature + ! at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + ! Melting only occurs if the wet-bulb temperature >0 + ! i.e. warming of ice particle due to melting > cooling + ! due to evaporation. + ZTDMTW0 = ZTP1(JL,JK)-RTT-ZSUBSAT* & + & (ZTW1+ZTW2*(PAP(JL,JK)-ZTW3)-ZTW4*(ZTP1(JL,JK)-ZTW5)) + ! Not implicit yet... + ! Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + ZCONS1 = ABS(PTSPHY*(1.0_JPRB+0.5_JPRB*ZTDMTW0)/RTAUMEL) + ZMELTMAX(JL) = MAX(ZTDMTW0*ZCONS1*ZRLDCP,0.0_JPRB) + ENDIF + ENDDO + + ! Loop over frozen hydrometeors (ice, snow) + DO JM=1,NCLV + IF (IPHASE(JM) == 2) THEN + JN = IMELT(JM) + DO JL=KIDIA,KFDIA + IF(ZMELTMAX(JL)>ZEPSEC .AND. ZICETOT(JL)>ZEPSEC) THEN + ! Apply melting in same proportion as frozen hydrometeor fractions + ZALFA = ZQXFG(JL,JM)/ZICETOT(JL) + ZMELT = MIN(ZQXFG(JL,JM),ZALFA*ZMELTMAX(JL)) + ! needed in first guess + ! This implies that zqpretot has to be recalculated below + ! since is not conserved here if ice falls and liquid doesn't + ZQXFG(JL,JM) = ZQXFG(JL,JM)-ZMELT + ZQXFG(JL,JN) = ZQXFG(JL,JN)+ZMELT + ZSOLQA(JL,JN,JM) = ZSOLQA(JL,JN,JM)+ZMELT + ZSOLQA(JL,JM,JN) = ZSOLQA(JL,JM,JN)-ZMELT + ENDIF + ENDDO + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.4b FREEZING of RAIN + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + ! If rain present + IF (ZQX(JL,JK,NCLDQR) > ZEPSEC) THEN + + IF (ZTP1(JL,JK) <= RTT .AND. ZTP1(JL,JK-1) > RTT) THEN + ! Base of melting layer/top of refreezing layer so + ! store rain/snow fraction for precip type diagnosis + ! If mostly rain, then supercooled rain slow to freeze + ! otherwise faster to freeze (snow or ice pellets) + ZQPRETOT(JL) = MAX(ZQX(JL,JK,NCLDQS)+ZQX(JL,JK,NCLDQR),ZEPSEC) + PRAINFRAC_TOPRFZ(JL) = ZQX(JL,JK,NCLDQR)/ZQPRETOT(JL) + IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN + LLRAINLIQ(JL) = .True. + ELSE + LLRAINLIQ(JL) = .False. + ENDIF + ENDIF + + ! If temperature less than zero + IF (ZTP1(JL,JK) < RTT) THEN + + IF (LLRAINLIQ(JL)) THEN + + ! Majority of raindrops completely melted + ! Refreezing is by slow heterogeneous freezing + + ! Slope of rain particle size distribution + ZLAMBDA = (RCL_FAC1/(ZRHO(JL)*ZQX(JL,JK,NCLDQR)))**RCL_FAC2 + + ! Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ZTEMP = RCL_FZRAB * (ZTP1(JL,JK)-RTT) + ZFRZ = PTSPHY * (RCL_CONST5R/ZRHO(JL)) * (EXP(ZTEMP)-1._JPRB) & + & * ZLAMBDA**RCL_CONST6R + ZFRZMAX(JL) = MAX(ZFRZ,0.0_JPRB) + + ELSE + + ! Majority of raindrops only partially melted + ! Refreeze with a shorter timescale (reverse of melting...for now) + + ZCONS1 = ABS(PTSPHY*(1.0_JPRB+0.5_JPRB*(RTT-ZTP1(JL,JK)))/RTAUMEL) + ZFRZMAX(JL) = MAX((RTT-ZTP1(JL,JK))*ZCONS1*ZRLDCP,0.0_JPRB) + + ENDIF + + IF(ZFRZMAX(JL)>ZEPSEC) THEN + ZFRZ = MIN(ZQX(JL,JK,NCLDQR),ZFRZMAX(JL)) + ZSOLQA(JL,NCLDQS,NCLDQR) = ZSOLQA(JL,NCLDQS,NCLDQR)+ZFRZ + ZSOLQA(JL,NCLDQR,NCLDQS) = ZSOLQA(JL,NCLDQR,NCLDQS)-ZFRZ + ENDIF + ENDIF + + ENDIF + + ENDDO + + !---------------------------------------------------------------------- + ! 4.4c FREEZING of LIQUID + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + ! not implicit yet... + ZFRZMAX(JL)=MAX((RTHOMO-ZTP1(JL,JK))*ZRLDCP,0.0_JPRB) + ENDDO + + JM = NCLDQL + JN = IMELT(JM) + DO JL=KIDIA,KFDIA + IF(ZFRZMAX(JL)>ZEPSEC .AND. ZQXFG(JL,JM)>ZEPSEC) THEN + ZFRZ = MIN(ZQXFG(JL,JM),ZFRZMAX(JL)) + ZSOLQA(JL,JN,JM) = ZSOLQA(JL,JN,JM)+ZFRZ + ZSOLQA(JL,JM,JN) = ZSOLQA(JL,JM,JN)-ZFRZ + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.5 EVAPORATION OF RAIN/SNOW + !---------------------------------------------------------------------- + + !---------------------------------------- + ! Rain evaporation scheme from Sundquist + !---------------------------------------- + IF (IEVAPRAIN == 1) THEN + + ! Rain + + DO JL=KIDIA,KFDIA + + ZZRH=RPRECRHMAX+(1.0_JPRB-RPRECRHMAX)*ZCOVPMAX(JL)/MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZZRH=MIN(MAX(ZZRH,RPRECRHMAX),1.0_JPRB) + + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSLIQ(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + !--------------------------------------------- + ! humidity in moistest ZCOVPCLR part of domain + !--------------------------------------------- + ZQE=MAX(0.0_JPRB,MIN(ZQE,ZQSLIQ(JL,JK))) + LLO1=ZCOVPCLR(JL)>ZEPSEC .AND. & + & ZQXFG(JL,NCLDQR)>ZEPSEC .AND. & + & ZQE0.8) + !IF(ZSIGK > 0.8_JPRB) THEN + ! ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + !ENDIF + !ZZRH = MIN(ZRHC,ZZRH) + + ! Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + ZZRH = MIN(0.8_JPRB,ZZRH) + + ZQE=MAX(0.0_JPRB,MIN(ZQX(JL,JK,NCLDQV),ZQSLIQ(JL,JK))) + + LLO1=ZCOVPCLR(JL)>ZEPSEC .AND. & + & ZQXFG(JL,NCLDQR)>ZEPSEC .AND. & + & ZQEZEPSEC .AND. & + & ZQXFG(JL,NCLDQS)>ZEPSEC .AND. & + & ZQEZEPSEC .AND. & + & ZQX(JL,JK,NCLDQS)>ZEPSEC .AND. & + & ZQE SELF%IN_VARS_3D_REAL64(1)%PTR + SELF%PICRIT_AER => SELF%IN_VARS_3D_REAL64(2)%PTR + SELF%PRE_ICE => SELF%IN_VARS_3D_REAL64(3)%PTR + SELF%PCCN => SELF%IN_VARS_3D_REAL64(4)%PTR + SELF%PNICE => SELF%IN_VARS_3D_REAL64(5)%PTR + SELF%PT => SELF%IN_VARS_3D_REAL64(6)%PTR + SELF%PQ => SELF%IN_VARS_3D_REAL64(7)%PTR + SELF%PVFA => SELF%IN_VARS_3D_REAL64(8)%PTR + SELF%PVFL => SELF%IN_VARS_3D_REAL64(9)%PTR + SELF%PVFI => SELF%IN_VARS_3D_REAL64(10)%PTR + SELF%PDYNA => SELF%IN_VARS_3D_REAL64(11)%PTR + SELF%PDYNL => SELF%IN_VARS_3D_REAL64(12)%PTR + SELF%PDYNI => SELF%IN_VARS_3D_REAL64(13)%PTR + SELF%PHRSW => SELF%IN_VARS_3D_REAL64(14)%PTR + SELF%PHRLW => SELF%IN_VARS_3D_REAL64(15)%PTR + SELF%PVERVEL => SELF%IN_VARS_3D_REAL64(16)%PTR + SELF%PAP => SELF%IN_VARS_3D_REAL64(17)%PTR + SELF%PLU => SELF%IN_VARS_3D_REAL64(18)%PTR + SELF%PLUDE => SELF%IN_VARS_3D_REAL64(19)%PTR + SELF%PSNDE => SELF%IN_VARS_3D_REAL64(20)%PTR + SELF%PMFU => SELF%IN_VARS_3D_REAL64(21)%PTR + SELF%PMFD => SELF%IN_VARS_3D_REAL64(22)%PTR + SELF%PA => SELF%IN_VARS_3D_REAL64(23)%PTR + SELF%PSUPSAT => SELF%IN_VARS_3D_REAL64(24)%PTR + + FIELD = FSET%FIELD("PLSM") + CALL FIELD%DATA(SELF%PLSM) + FIELD = FSET%FIELD("LDCUM") + CALL FIELD%DATA(SELF%LDCUM) + FIELD = FSET%FIELD("KTYPE") + CALL FIELD%DATA(SELF%KTYPE) + FIELD = FSET%FIELD("PAPH") + CALL FIELD%DATA(SELF%PAPH) + FIELD = FSET%FIELD("PEXTRA") + CALL FIELD%DATA(SELF%PEXTRA) + FIELD = FSET%FIELD("PCLV") + CALL FIELD%DATA(SELF%PCLV) + + DO IVAR = 1, SIZE(IN_VAR_NAMES) + CALL LOADVAR_ATLAS(FSET, TRIM(IN_VAR_NAMES(IVAR)), KLON, NGPTOTG) + ENDDO + + FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_CML', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) + CALL FIELD%DATA(SELF%B_CML) + CALL FSET%ADD(FIELD) + FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_TMP', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) + CALL FIELD%DATA(SELF%B_TMP) + CALL FSET%ADD(FIELD) + FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_LOC', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) + CALL FIELD%DATA(SELF%B_LOC) + CALL FSET%ADD(FIELD) + + ! The STATE_TYPE arrays are tricky, as the AOSOA layout needs to be expictly + ! unrolled at every step, and we rely on dirty hackery to do this. + CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_CML', SELF%TENDENCY_CML, KLON, NGPTOTG) + CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_TMP', SELF%TENDENCY_TMP, KLON, NGPTOTG) + ALLOCATE(SELF%TENDENCY_LOC(SELF%NBLOCKS)) + !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) + DO B=1, SELF%NBLOCKS + SELF%TENDENCY_LOC(B)%T => SELF%B_LOC(:,:,1,B) + SELF%TENDENCY_LOC(B)%A => SELF%B_LOC(:,:,2,B) + SELF%TENDENCY_LOC(B)%Q => SELF%B_LOC(:,:,3,B) + SELF%TENDENCY_LOC(B)%CLD => SELF%B_LOC(:,:,4:,B) + END DO + !$OMP END PARALLEL DO + + ! Output fields are simply allocated and zero'd + DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 2 + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) + ENDDO + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCOVPTOT", KIND=ATLAS_REAL(JPRB))) + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PRAINFRAC_TOPRFZ", KIND=ATLAS_REAL(JPRB), LEVELS=0)) + + DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 1 + FIELD = FSET%FIELD(TRIM(OUT_VAR_NAMES(IVAR))) + CALL FIELD%DATA(SELF%OUT_VARS_3D_REAL64(IVAR)%PTR) + !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) + DO B=1, SELF%NBLOCKS + SELF%OUT_VARS_3D_REAL64(IVAR)%PTR(:,:,B) = 0.0_JPRB + END DO + !$omp end parallel do + ENDDO + + SELF%PFSQLF => SELF%OUT_VARS_3D_REAL64(1)%PTR + SELF%PFSQIF => SELF%OUT_VARS_3D_REAL64(2)%PTR + SELF%PFCQLNG => SELF%OUT_VARS_3D_REAL64(3)%PTR + SELF%PFCQNNG => SELF%OUT_VARS_3D_REAL64(4)%PTR + SELF%PFSQRF => SELF%OUT_VARS_3D_REAL64(5)%PTR + SELF%PFSQSF => SELF%OUT_VARS_3D_REAL64(6)%PTR + SELF%PFCQRNG => SELF%OUT_VARS_3D_REAL64(7)%PTR + SELF%PFCQSNG => SELF%OUT_VARS_3D_REAL64(8)%PTR + SELF%PFSQLTUR => SELF%OUT_VARS_3D_REAL64(9)%PTR + SELF%PFSQITUR => SELF%OUT_VARS_3D_REAL64(10)%PTR + SELF%PFPLSL => SELF%OUT_VARS_3D_REAL64(11)%PTR + SELF%PFPLSN => SELF%OUT_VARS_3D_REAL64(12)%PTR + SELF%PFHPSL => SELF%OUT_VARS_3D_REAL64(13)%PTR + SELF%PFHPSN => SELF%OUT_VARS_3D_REAL64(14)%PTR + SELF%PCOVPTOT => SELF%OUT_VARS_3D_REAL64(15)%PTR + + FIELD = FSET%FIELD("PRAINFRAC_TOPRFZ") + CALL FIELD%DATA(SELF%PRAINFRAC_TOPRFZ) + !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) + DO B=1, SELF%NBLOCKS + SELF%PRAINFRAC_TOPRFZ(:,B) = 0.0_JPRB + END DO + !$OMP END PARALLEL DO + + ! Initialize global parameters from the input file + CALL LOAD_SCALAR('PTSPHY', SELF%PTSPHY) + CALL LOAD_SCALAR('LDSLPHY', SELF%LDSLPHY) + CALL LOAD_SCALAR('LDMAINCALL', SELF%LDMAINCALL) + CALL YOMCST_LOAD_PARAMETERS() + CALL YOETHF_LOAD_PARAMETERS() + CALL YRECLDP_LOAD_PARAMETERS() + CALL YREPHLI_LOAD_PARAMETERS() + + CALL INPUT_FINALIZE() + + END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD + + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, NGPTOT, NGPTOTG) + ! Validate the correctness of output against reference data + CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF + INTEGER(KIND=JPIM), INTENT(IN) :: NGPTOT + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG + + INTEGER(KIND=JPIM) :: KLON, IVAR + + CALL INPUT_INITIALIZE(NAME='reference') + CALL LOAD_SCALAR('KLON', KLON) + CALL INPUT_FINALIZE() + + ! Write variable validation header + IF (IRANK == 0) THEN + print '(1X,A20,1X,A3,5(1X,A20))', & + & 'Variable','Dim', 'MinValue','MaxValue','AbsMaxErr','AvgAbsErr/GP','MaxRelErr-%' + END IF + + + ! Actual variable validation + CALL VALIDATEVAR_ATLAS(FSET, 'PLUDE', KLON, NGPTOTG) + DO IVAR = 1, SIZE(OUT_VAR_NAMES) + CALL VALIDATEVAR_ATLAS(FSET, OUT_VAR_NAMES(IVAR), KLON, NGPTOTG) + ENDDO + CALL VALIDATESTATE_ATLAS(FSET, 'TENDENCY_LOC', KLON, NGPTOTG) + + END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE + +END MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 new file mode 100644 index 00000000..2504766c --- /dev/null +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -0,0 +1,105 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +PROGRAM DWARF_CLOUDSC + +USE PARKIND1, ONLY: JPIM, JPIB +USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK +USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE +USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER +USE EC_PMON_MOD, ONLY: EC_PMON + +USE ATLAS_MODULE +USE, INTRINSIC :: ISO_C_BINDING + +IMPLICIT NONE + +CHARACTER(LEN=20) :: CLARG +INTEGER(KIND=JPIM) :: IARGS, LENARG, JARG, I + +INTEGER(KIND=JPIM) :: NUMOMP = 1 ! Number of OpenMP threads for this run +INTEGER(KIND=JPIM) :: NGPTOTG = 16384 ! Number of grid points (as read from command line) +INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active) +INTEGER(KIND=JPIM) :: NGPTOT ! Local number of grid points + +TYPE(CLOUDSC_GLOBAL_ATLAS_STATE) :: GLOBAL_ATLAS_STATE + +INTEGER(KIND=JPIB) :: ENERGY, POWER +CHARACTER(LEN=1) :: CLEC_PMON + +CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON) +IF (CLEC_PMON == '1') THEN + CALL EC_PMON(ENERGY, POWER) + print *, "EC_PMON:: Initial (idle) power: ", POWER +END IF + +IARGS = COMMAND_ARGUMENT_COUNT() + +! Get the number of OpenMP threads to use for the benchmark +if (IARGS >= 1) then + CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NUMOMP +end if + +! Initialize MPI environment +CALL ATLAS_LIBRARY%INITIALISE() +CALL CLOUDSC_MPI_INIT(NUMOMP) + +! Get total number of grid points (NGPTOT) with which to run the benchmark +IF (IARGS >= 2) THEN + CALL GET_COMMAND_ARGUMENT(2, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NGPTOTG +END IF + +! Determine local number of grid points +NGPTOT = (NGPTOTG - 1) / NUMPROC + 1 +if (IRANK == NUMPROC - 1) then + NGPTOT = NGPTOTG - (NUMPROC - 1) * NGPTOT +end if + +! Get the block size (NPROMA) for which to run the benchmark +IF (IARGS >= 3) THEN + CALL GET_COMMAND_ARGUMENT(3, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NPROMA +ENDIF + +! TODO: Create a global global memory state from serialized input data +CALL GLOBAL_ATLAS_STATE%LOAD(NPROMA, NGPTOT, NGPTOTG) + +! Call the driver to perform the parallel loop over our kernel +CALL CLOUDSC_DRIVER(NUMOMP, NPROMA, GLOBAL_ATLAS_STATE%KLEV, NGPTOT, NGPTOTG, & + & GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY, & + & GLOBAL_ATLAS_STATE%PT, GLOBAL_ATLAS_STATE%PQ, & + & GLOBAL_ATLAS_STATE%TENDENCY_CML, GLOBAL_ATLAS_STATE%TENDENCY_TMP, GLOBAL_ATLAS_STATE%TENDENCY_LOC, & + & GLOBAL_ATLAS_STATE%PVFA, GLOBAL_ATLAS_STATE%PVFL, GLOBAL_ATLAS_STATE%PVFI, & + & GLOBAL_ATLAS_STATE%PDYNA, GLOBAL_ATLAS_STATE%PDYNL, GLOBAL_ATLAS_STATE%PDYNI, & + & GLOBAL_ATLAS_STATE%PHRSW, GLOBAL_ATLAS_STATE%PHRLW, & + & GLOBAL_ATLAS_STATE%PVERVEL, GLOBAL_ATLAS_STATE%PAP, GLOBAL_ATLAS_STATE%PAPH, & + & GLOBAL_ATLAS_STATE%PLSM, GLOBAL_ATLAS_STATE%LDCUM, GLOBAL_ATLAS_STATE%KTYPE, & + & GLOBAL_ATLAS_STATE%PLU, GLOBAL_ATLAS_STATE%PLUDE, GLOBAL_ATLAS_STATE%PSNDE, & + & GLOBAL_ATLAS_STATE%PMFU, GLOBAL_ATLAS_STATE%PMFD, & + & GLOBAL_ATLAS_STATE%PA, GLOBAL_ATLAS_STATE%PCLV, GLOBAL_ATLAS_STATE%PSUPSAT,& + & GLOBAL_ATLAS_STATE%PLCRIT_AER, GLOBAL_ATLAS_STATE%PICRIT_AER, GLOBAL_ATLAS_STATE%PRE_ICE, & + & GLOBAL_ATLAS_STATE%PCCN, GLOBAL_ATLAS_STATE%PNICE,& + & GLOBAL_ATLAS_STATE%PCOVPTOT, GLOBAL_ATLAS_STATE%PRAINFRAC_TOPRFZ, & + & GLOBAL_ATLAS_STATE%PFSQLF, GLOBAL_ATLAS_STATE%PFSQIF , GLOBAL_ATLAS_STATE%PFCQNNG, GLOBAL_ATLAS_STATE%PFCQLNG, & + & GLOBAL_ATLAS_STATE%PFSQRF, GLOBAL_ATLAS_STATE%PFSQSF , GLOBAL_ATLAS_STATE%PFCQRNG, GLOBAL_ATLAS_STATE%PFCQSNG, & + & GLOBAL_ATLAS_STATE%PFSQLTUR, GLOBAL_ATLAS_STATE%PFSQITUR, & + & GLOBAL_ATLAS_STATE%PFPLSL, GLOBAL_ATLAS_STATE%PFPLSN, GLOBAL_ATLAS_STATE%PFHPSL, GLOBAL_ATLAS_STATE%PFHPSN & + & ) + +! Validate the output against serialized reference data +!CALL GLOBAL_ATLAS_STATE%VALIDATE(NGPTOT, NGPTOTG) + +CALL ATLAS_LIBRARY%FINALISE() + +! Tear down MPI environment +CALL CLOUDSC_MPI_END() + +END PROGRAM DWARF_CLOUDSC diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 new file mode 100644 index 00000000..1037f9c1 --- /dev/null +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -0,0 +1,151 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +module expand_atlas_mod + use atlas_module + use atlas_fieldset_module + use atlas_functionspace_blockstructuredcolumns_module + + use parkind1 , only : jpim, jprb + use yomphyder, only : state_type + + use cloudsc_mpi_mod, only : irank, numproc + use file_io_mod, only: input_initialize, load_scalar, load_array + use expand_mod, only: get_offsets, expand + + use, intrinsic :: iso_c_binding, only : c_int, c_double + + implicit none + +contains + + subroutine loadvar_atlas(fset, name, nlon, ngptotg) + ! Load into the local memory buffer and expand to global field + type(atlas_fieldset), intent(inout) :: fset + character(len=*), intent(in) :: name + integer(kind=jpim), intent(in) :: nlon + integer(kind=jpim), intent(in), optional :: ngptotg + + integer(kind=jpim) :: start, end, size, nlev, nproma, ngptot, nblocks, ndim, frank + type(atlas_field) :: field + real(kind=jprb), allocatable :: buffer_r1(:), buffer_r2(:,:), buffer_r3(:,:,:) + integer(kind=jpim), allocatable :: buffer_i1(:) + logical, allocatable :: buffer_l1(:) + real(c_double), pointer :: field_r1(:,:), field_r2(:,:,:), field_r3(:,:,:,:) + integer(c_int), pointer :: field_i1(:,:) + logical, pointer :: field_l1(:,:) + type(atlas_functionspace_blockstructuredcolumns) :: fspace + logical :: lfield, rfield, ifield + + field = fset%field(name) + frank = field%rank() + lfield = (name == "LDCUM") + ifield = (name == "KTYPE") + rfield = ((.not. lfield) .and. (.not. ifield)) + + fspace = field%functionspace() + nlev = field%levels() + nproma = fspace%block_size(1) + ngptot = fspace%size() + nblocks = fspace%nblks() + + if (frank == 2) then + call get_offsets(start, end, size, nlon, 1, 1, ngptot, ngptotg) + if (rfield) then + allocate(buffer_r1(size)) + call field%data(field_r1) + call load_array(name, start, end, size, nlon, buffer_r1) + call expand(buffer_r1, field_r1, size, nproma, ngptot, nblocks) + deallocate(buffer_r1) + else if (lfield) then + allocate(buffer_l1(size)) + call field%data(field_l1) + call load_array(name, start, end, size, nlon, buffer_l1) + call expand(buffer_l1, field_l1, size, nproma, ngptot, nblocks) + deallocate(buffer_l1) + else + allocate(buffer_i1(size)) + call field%data(field_i1) + call load_array(name, start, end, size, nlon, buffer_i1) + call expand(buffer_i1, field_i1, size, nproma, ngptot, nblocks) + deallocate(buffer_i1) + endif + else if (frank == 3) then + call get_offsets(start, end, size, nlon, 1, nlev, ngptot, ngptotg) + if (rfield) then + call field%data(field_r2) + allocate(buffer_r2(size, nlev)) + call load_array(name, start, end, size, nlon, nlev, buffer_r2) + call expand(buffer_r2, field_r2, size, nproma, nlev, ngptot, nblocks) + deallocate(buffer_r2) + endif + else if (frank == 4) then + ndim = field%shape(3) + call get_offsets(start, end, size, nlon, ndim, nlev, ngptot, ngptotg) + if (rfield) then + call field%data(field_r3) + allocate(buffer_r3(size, nlev, ndim)) + call load_array(name, start, end, size, nlon, nlev, ndim, buffer_r3) + call expand(buffer_r3, field_r3, size, nproma, nlev, ndim, ngptot, nblocks) + deallocate(buffer_r3) + endif + endif + end subroutine loadvar_atlas + + subroutine loadstate_atlas(fset, name, state, nlon, ngptotg) + ! Load into the local memory buffer and expand to global field + type(atlas_fieldset), intent(inout) :: fset + character(len=*) :: name + type(state_type), allocatable, intent(inout) :: state(:) + integer(kind=jpim), intent(in) :: nlon + integer(kind=jpim), intent(in), optional :: ngptotg + + integer :: b + integer(kind=jpim) :: start, end, size, nlev, nproma, ngptot, nblocks, ndim + type(atlas_field) :: field + type(atlas_functionspace_blockstructuredcolumns) :: fspace + + real(kind=jprb), allocatable :: buffer(:,:,:) + real(c_double), pointer :: field_r3(:,:,:,:) + + field = fset%field(name) + fspace = field%functionspace() + nlev = field%levels() + nproma = fspace%block_size(1) + ngptot = fspace%size() + nblocks = fspace%nblks() + ndim = field%shape(3) - 3 + + call get_offsets(start, end, size, nlon, ndim, nlev, ngptot, ngptotg) + allocate(buffer(size, nlev, 3+ndim)) + if (.not. allocated(state)) allocate(state(nblocks)) + call field%data(field_r3) + + call load_array(name//'_T', start, end, size, nlon, nlev, buffer(:,:,1)) + call load_array(name//'_A', start, end, size, nlon, nlev, buffer(:,:,2)) + call load_array(name//'_Q', start, end, size, nlon, nlev, buffer(:,:,3)) + call load_array(name//'_CLD', start, end, size, nlon, nlev, ndim, buffer(:,:,4:)) + + call expand(buffer(:,:,1), field_r3(:,:,1,:), size, nproma, nlev, ngptot, nblocks) + call expand(buffer(:,:,2), field_r3(:,:,2,:), size, nproma, nlev, ngptot, nblocks) + call expand(buffer(:,:,3), field_r3(:,:,3,:), size, nproma, nlev, ngptot, nblocks) + call expand(buffer(:,:,4:), field_r3(:,:,4:,:), size, nproma, nlev, ndim, ngptot, nblocks) + deallocate(buffer) + +!$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) + do b=1, nblocks + state(b)%t => field_r3(:,:,1,b) + state(b)%a => field_r3(:,:,2,b) + state(b)%q => field_r3(:,:,3,b) + state(b)%cld => field_r3(:,:,4:3+ndim,b) + end do +!$OMP end parallel do + end subroutine loadstate_atlas + +end module expand_atlas_mod diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 new file mode 100644 index 00000000..32365859 --- /dev/null +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -0,0 +1,182 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE VALIDATE_ATLAS_MOD + USE PARKIND1, ONLY: JPIM, JPRB + USE CLOUDSC_MPI_MOD + USE VALIDATE_MOD, ONLY: VALIDATE, ERROR_PRINT + + USE ATLAS_MODULE + USE ATLAS_FIELDSET_MODULE + USE ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS_MODULE + USE, INTRINSIC :: ISO_C_BINDING + USE EXPAND_MOD, ONLY: LOAD_AND_EXPAND + USE FILE_IO_MOD, ONLY: INPUT_INITIALIZE, INPUT_FINALIZE + + IMPLICIT NONE + +CONTAINS + + SUBROUTINE VALIDATESTATE_ATLAS(FSET, NAME, NLON, NGPTOTG) + TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + CHARACTER(*), INTENT(IN) :: NAME + INTEGER(KIND=JPIM), INTENT(IN) :: NLON + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG + + CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "A") + CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "Q") + CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "T") + CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "CLD") + END SUBROUTINE VALIDATESTATE_ATLAS + + SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) + ! Computes and prints errors "in the L1 norm sense" + TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + CHARACTER(*), INTENT(IN) :: NAME + INTEGER(KIND=JPIM), INTENT(IN) :: NLON + INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG + CHARACTER(*), INTENT(IN), OPTIONAL :: STATE_VAR + + REAL(KIND=JPRB), ALLOCATABLE :: REF_R2(:,:), REF_R3(:,:,:), REF_R4(:,:,:,:) + REAL(C_DOUBLE), POINTER :: FIELD_R1(:,:), FIELD_R2(:,:,:), FIELD_R3(:,:,:,:) + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE + TYPE(ATLAS_FIELD) :: FIELD + INTEGER :: B, BSIZE, JL, JK, JM + REAL(KIND=JPRB) :: ZMINVAL(1), ZMAX_VAL_ERR(2), ZDIFF, ZSUM_ERR_ABS(2), ZRELERR, ZAVGPGP + INTEGER :: FRANK, NBLOCKS, NLEV, NGPTOT, NPROMA, VAR_ID, NDIM + CHARACTER(LEN=256) :: FULLNAME + + IF (PRESENT(STATE_VAR)) THEN + FULLNAME = NAME//'_'//STATE_VAR + ELSE + FULLNAME = NAME + ENDIF + + FIELD = FSET%FIELD(NAME) + FRANK = FIELD%RANK() + FSPACE = FIELD%FUNCTIONSPACE() + NLEV = FIELD%LEVELS() + NGPTOT = FSPACE%SIZE() + NBLOCKS = FSPACE%NBLKS() + NPROMA = FSPACE%BLOCK_SIZE(1) + + ZMINVAL(1) = +HUGE(ZMINVAL(1)) + ZMAX_VAL_ERR(1) = -HUGE(ZMAX_VAL_ERR(1)) + ZMAX_VAL_ERR(2) = 0.0_JPRB + ZSUM_ERR_ABS(:) = 0.0_JPRB + + CALL INPUT_INITIALIZE(NAME='reference') + IF (FRANK == 2) THEN + CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + CALL FIELD%DATA(FIELD_R1) + !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & + !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) + DO B=1, NBLOCKS + BSIZE = FSPACE%BLOCK_SIZE(B) + ZMINVAL(1) = MIN(ZMINVAL(1),MINVAL(FIELD_R1(:,B))) + ZMAX_VAL_ERR(1) = MAX(ZMAX_VAL_ERR(1),MAXVAL(FIELD_R1(:,B))) + DO JK=1, BSIZE + ! Difference against reference result in one-norm sense + ZDIFF = ABS(FIELD_R1(JK,B) - REF_R2(JK,B)) + ZMAX_VAL_ERR(2) = MAX(ZMAX_VAL_ERR(2),ZDIFF) + ZSUM_ERR_ABS(1) = ZSUM_ERR_ABS(1) + ZDIFF + ZSUM_ERR_ABS(2) = ZSUM_ERR_ABS(2) + ABS(REF_R2(JK,B)) + ENDDO + END DO + ELSE IF (FRANK == 3) THEN + CALL LOAD_AND_EXPAND(NAME, REF_R3, NLON, FIELD%LEVELS(), NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + CALL FIELD%DATA(FIELD_R2) + !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & + !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) + DO B=1, NBLOCKS + BSIZE = FSPACE%BLOCK_SIZE(B) + ZMINVAL(1) = MIN(ZMINVAL(1),MINVAL(FIELD_R2(:,:,B))) + ZMAX_VAL_ERR(1) = MAX(ZMAX_VAL_ERR(1),MAXVAL(FIELD_R2(:,:,B))) + DO JL=1, NLEV + DO JK=1, BSIZE + ! Difference against reference result in one-norm sense + ZDIFF = ABS(FIELD_R2(JK,JL,B) - REF_R3(JK,JL,B)) + ZMAX_VAL_ERR(2) = MAX(ZMAX_VAL_ERR(2),ZDIFF) + ZSUM_ERR_ABS(1) = ZSUM_ERR_ABS(1) + ZDIFF + ZSUM_ERR_ABS(2) = ZSUM_ERR_ABS(2) + ABS(REF_R3(JK,JL,B)) + ENDDO + END DO + END DO + ELSE IF (FRANK == 4 .AND. PRESENT(STATE_VAR)) THEN + CALL FIELD%DATA(FIELD_R3) + NDIM = FIELD%SHAPE(3) - 3 + IF (STATE_VAR /= 'CLD') THEN + VAR_ID = 1 + IF (STATE_VAR == 'A') THEN + VAR_ID = 2 + ENDIF + IF (STATE_VAR == 'Q') THEN + VAR_ID = 3 + ENDIF + CALL LOAD_AND_EXPAND(NAME//'_'//STATE_VAR, REF_R3, NLON, NLEV, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & + !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) + DO B=1, NBLOCKS + BSIZE = FSPACE%BLOCK_SIZE(B) + ZMINVAL(1) = MIN(ZMINVAL(1),MINVAL(FIELD_R3(:,:,VAR_ID,B))) + ZMAX_VAL_ERR(1) = MAX(ZMAX_VAL_ERR(1),MAXVAL(FIELD_R3(:,:,VAR_ID,B))) + DO JL=1, NLEV + DO JK=1, BSIZE + ! Difference against reference result in one-norm sense + ZDIFF = ABS(FIELD_R3(JK,JL,VAR_ID,B) - REF_R3(JK,JL,B)) + ZMAX_VAL_ERR(2) = MAX(ZMAX_VAL_ERR(2),ZDIFF) + ZSUM_ERR_ABS(1) = ZSUM_ERR_ABS(1) + ZDIFF + ZSUM_ERR_ABS(2) = ZSUM_ERR_ABS(2) + ABS(REF_R3(JK,JL,B)) + ENDDO + END DO + END DO + ELSE IF (STATE_VAR == 'CLD') THEN + CALL LOAD_AND_EXPAND(NAME//'_CLD', REF_R4, NLON, NLEV, NDIM, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & + !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) + DO B=1, NBLOCKS + BSIZE = MIN(NLON, NGPTOT - (B-1)*NLON) ! Field block size + ZMINVAL(1) = MIN(ZMINVAL(1),MINVAL(FIELD_R3(:,:,4:,B))) + ZMAX_VAL_ERR(1) = MAX(ZMAX_VAL_ERR(1),MAXVAL(FIELD_R3(:,:,4:,B))) + DO JM=1, NDIM + DO JL=1, NLEV + DO JK=1, BSIZE + ! Difference against reference result in one-norm sense + ZDIFF = ABS(FIELD_R3(JK,JL,3+JM,B) - REF_R4(JK,JL,JM,B)) + ZMAX_VAL_ERR(2) = MAX(ZMAX_VAL_ERR(2),ZDIFF) + ZSUM_ERR_ABS(1) = ZSUM_ERR_ABS(1) + ZDIFF + ZSUM_ERR_ABS(2) = ZSUM_ERR_ABS(2) + ABS(REF_R4(JK,JL,JM,B)) + ENDDO + ENDDO + END DO + END DO + ENDIF + ELSE + PRINT *, "FIELD RANK NOT SUPPORTED" + CALL EXIT(1) + ENDIF + CALL INPUT_FINALIZE() + + CALL CLOUDSC_MPI_REDUCE_MIN(ZMINVAL, 1, 0) + CALL CLOUDSC_MPI_REDUCE_MAX(ZMAX_VAL_ERR, 2, 0) + CALL CLOUDSC_MPI_REDUCE_SUM(ZSUM_ERR_ABS, 2, 0) + + IF (PRESENT(NGPTOTG)) THEN + ZAVGPGP = ZSUM_ERR_ABS(1) / REAL(NGPTOTG,JPRB) + ELSE + ZAVGPGP = ZSUM_ERR_ABS(1) / REAL(NGPTOT,JPRB) + END IF + + IF (IRANK == 0) THEN + CALL ERROR_PRINT(FULLNAME, ZMINVAL(1), ZMAX_VAL_ERR(1), ZMAX_VAL_ERR(2), & + & ZSUM_ERR_ABS(1), ZSUM_ERR_ABS(2), ZAVGPGP, NDIM=FRANK-1) + END IF + END SUBROUTINE VALIDATEVAR_ATLAS + +END MODULE VALIDATE_ATLAS_MOD From 49d7222db82eda3d4d4339c64d3aaed641fe0ba1 Mon Sep 17 00:00:00 2001 From: Zbigniew Piotrowski Date: Wed, 15 Mar 2023 12:27:37 +0000 Subject: [PATCH 010/174] Removal of Volta from Readme, deleting volta benchmark arch file --- README.md | 10 +++---- .../arch/volta/nvhpc/22.3/include_arch.yml | 27 ------------------- 2 files changed, 5 insertions(+), 32 deletions(-) delete mode 100644 benchmark/arch/volta/nvhpc/22.3/include_arch.yml diff --git a/README.md b/README.md index a9e9d59b..04cdea98 100644 --- a/README.md +++ b/README.md @@ -123,22 +123,22 @@ has proven difficult with certain compiler toolchains. ### GPU versions of CLOUDSC The GPU-enabled versions of the dwarf are by default disabled. To -enable them use the `--with-gpu` flag. For example to build on the in-house -volta machine: +enable them use the `--with-gpu` flag. For example to build on the ECMWF's ATOS +A100 nodes: ```sh ./cloudsc-bundle create # Checks out dependency packages -./cloudsc-bundle build --clean --with-gpu --arch=./arch/ecmwf/volta/nvhpc/20.9 +./cloudsc-bundle build --clean --with-gpu --arch=./arch/ecmwf/hpc2020/nvhpc/22.1 ``` ### MPI-enabled versions of CLOUDSC Optionally, dwarf-cloudsc-fortran and the GPU versions can be built with -MPI support by providing the `--with-mpi` flag. For example on volta: +MPI support by providing the `--with-mpi` flag. For example on ATOS: ```sh ./cloudsc-bundle create -./cloudsc-bundle build --clean --with-mpi --with-gpu --arch=./arch/ecmwf/volta/nvhpc/20.9 +./cloudsc-bundle build --clean --with-mpi --with-gpu --arch=./arch/ecmwf/hpc2020/nvhpc/22.1 ``` Running with MPI parallelization distributes the columns of the working set diff --git a/benchmark/arch/volta/nvhpc/22.3/include_arch.yml b/benchmark/arch/volta/nvhpc/22.3/include_arch.yml deleted file mode 100644 index e24cc355..00000000 --- a/benchmark/arch/volta/nvhpc/22.3/include_arch.yml +++ /dev/null @@ -1,27 +0,0 @@ -parameterset: - # System architecture specification - - name: arch_set - init_with: include/include_arch.yml - parameter: - # Architecture definition to pass to bundle build command - - {name: arch, _: "arch/ecmwf/volta/nvhpc/22.3"} # Choose from arch - - # Number of NUMA domains on a node (e.g., the number of sockets) - - {name: numa_domains, type: int, _: 2} - - # Number of cores per NUMA domain (e.g., number of cores per CPU) - - {name: cores_per_numa_domain, type: int, _: 8} - - # Number of GPUs available on a node - - {name: gpus, type: int, _: 2} - - # Set CUDA runtime heap size on GPU for SCC variant - - {name: PGI_ACC_CUDA_HEAPSIZE, export: true, _: 12G} - - # MPI launch command to use (inject CUDA_VISIBLE_DEVICES) - - name: launch_cmd - mode: python - _: "'mpirun -n ${{ NPROC }} --cpus-per-proc ${{ NUMOMP }} bash -c \"CUDA_VISIBLE_DEVICES=\\${OMPI_COMM_WORLD_RANK}' if $mpi == 1 else ''" - - name: launch_cmd_end - mode: python - _: "'\"' if $mpi == 1 else ''" From 3cd90da521a860b263a2c83bf33319266518c1cd Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Fri, 17 Mar 2023 09:40:37 +0000 Subject: [PATCH 011/174] manual OpenACC k-caching variant --- README.md | 15 +- bundle.yml | 1 + src/cloudsc_gpu/CMakeLists.txt | 25 + .../cloudsc_driver_gpu_scc_k_caching_mod.F90 | 189 ++ .../cloudsc_gpu_scc_k_caching_mod.F90 | 2641 +++++++++++++++++ src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 | 31 + 6 files changed, 2896 insertions(+), 6 deletions(-) create mode 100644 src/cloudsc_gpu/cloudsc_driver_gpu_scc_k_caching_mod.F90 create mode 100644 src/cloudsc_gpu/cloudsc_gpu_scc_k_caching_mod.F90 diff --git a/README.md b/README.md index 04cdea98..f2d8dc2c 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,9 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int) The block array arguments are fully dimensioned though, and multi-dimensional temporaries have been declared explicitly at the driver level. +- **dwarf-cloudsc-gpu-scc-k-caching**: GPU-enabled and further + optimized version of CLOUDSC that also uses the SCC loop layout in + combination with loop fusion and temporary local array demotion. - **dwarf-cloudsc-gpu-scc-cuf**: GPU-enabled and optimized version of CLOUDSC that uses the SCC loop layout in combination with CUDA-Fortran (CUF) to explicitly allocate temporary arrays in device memory and @@ -69,12 +72,12 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int) - **CUDA C prototypes**: To enable these variants, a suitable CUDA installation is required and the `--with-cuda` flag needs to be pased at the build stage. - - **dwarf-cloudsc-cuda**: GPU-enabled, CUDA C version of CLOUDSC. - - **dwarf-cloudsc-cuda-hoist**: GPU-enabled, optimized CUDA C version - of CLOUDSC including host side hoisted temporary local variables. - - **dwarf-cloudsc-cuda-k-caching**: GPU-enabled, further optimized CUDA - C version of CLOUDSC including loop fusion and temporary local - array demotion. + - **dwarf-cloudsc-cuda**: GPU-enabled, CUDA C version of CLOUDSC. + - **dwarf-cloudsc-cuda-hoist**: GPU-enabled, optimized CUDA C version + of CLOUDSC including host side hoisted temporary local variables. + - **dwarf-cloudsc-cuda-k-caching**: GPU-enabled, further optimized CUDA + C version of CLOUDSC including loop fusion and temporary local + array demotion. - **dwarf-cloudsc-gpu-scc-field**: GPU-enabled and optimized version of CLOUDSC that uses the SCC loop layout, and a dedicated Fortran FIELD API to manage device offload and copyback. The intent is to demonstrate diff --git a/bundle.yml b/bundle.yml index ef5a8640..f69a79dc 100644 --- a/bundle.yml +++ b/bundle.yml @@ -57,6 +57,7 @@ options : cmake : > ENABLE_CLOUDSC_GPU_SCC=ON ENABLE_CLOUDSC_GPU_SCC_HOIST=ON + ENABLE_CLOUDSC_GPU_SCC_K_CACHING=ON ENABLE_CLOUDSC_GPU_OMP_SCC_HOIST=ON - with-cuda : diff --git a/src/cloudsc_gpu/CMakeLists.txt b/src/cloudsc_gpu/CMakeLists.txt index b4a482c2..95c06fad 100644 --- a/src/cloudsc_gpu/CMakeLists.txt +++ b/src/cloudsc_gpu/CMakeLists.txt @@ -38,6 +38,11 @@ ecbuild_add_option( FEATURE CLOUDSC_GPU_SCC_HOIST CONDITION Serialbox_FOUND OR HDF5_FOUND ) +ecbuild_add_option( FEATURE CLOUDSC_GPU_SCC_K_CACHING + DESCRIPTION "Build (further) optimized GPU version of CLOUDSC using SCC layout with OpenACC" DEFAULT OFF + CONDITION Serialbox_FOUND OR HDF5_FOUND +) + ecbuild_add_option( FEATURE CLOUDSC_GPU_OMP_SCC_HOIST DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC with hoisted temporary arrays and OpenMP offload" DEFAULT OFF CONDITION Serialbox_FOUND OR HDF5_FOUND @@ -116,6 +121,26 @@ if( HAVE_CLOUDSC_GPU_SCC_HOIST ) ) endif() +if( HAVE_CLOUDSC_GPU_SCC_K_CACHING ) + ecbuild_add_executable( + TARGET dwarf-cloudsc-gpu-scc-k-caching + SOURCES + dwarf_cloudsc_gpu.F90 + cloudsc_driver_gpu_scc_k_caching_mod.F90 + cloudsc_gpu_scc_k_caching_mod.F90 + LIBS + cloudsc-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC_K_CACHING + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-gpu-scc-k-caching-serial + COMMAND bin/dwarf-cloudsc-gpu-scc-k-caching + ARGS 1 1000 128 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) +endif() if( HAVE_CLOUDSC_GPU_OMP_SCC_HOIST ) list( APPEND CLOUDSC_GPU_OMP_SCC_HOIST_DEFINITIONS CLOUDSC_GPU_OMP_SCC_HOIST ) diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_k_caching_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_k_caching_mod.F90 new file mode 100644 index 00000000..44a4edd8 --- /dev/null +++ b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_k_caching_mod.F90 @@ -0,0 +1,189 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE CLOUDSC_DRIVER_GPU_SCC_K_CACHING_MOD + + USE PARKIND1, ONLY: JPIM, JPRB + USE YOMPHYDER, ONLY: STATE_TYPE + USE YOECLDP, ONLY : NCLV, YRECLDP, TECLDP + USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK + USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM + + USE CLOUDSC_GPU_SCC_K_CACHING_MOD, ONLY: CLOUDSC_SCC_K_CACHING + + IMPLICIT NONE + +CONTAINS + + SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_K_CACHING( & + & NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG, KFLDX, PTSPHY, & + & PT, PQ, & + & BUFFER_CML, BUFFER_TMP, BUFFER_LOC, & + & PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, & + & PHRSW, PHRLW, & + & PVERVEL, PAP, PAPH, & + & PLSM, LDCUM, KTYPE, & + & PLU, PLUDE, PSNDE, PMFU, PMFD, & + & PA, & + & PCLV, PSUPSAT,& + & PLCRIT_AER,PICRIT_AER, PRE_ICE, & + & PCCN, PNICE,& + & PCOVPTOT, PRAINFRAC_TOPRFZ, & + & PFSQLF, PFSQIF , PFCQNNG, PFCQLNG, & + & PFSQRF, PFSQSF , PFCQRNG, PFCQSNG, & + & PFSQLTUR, PFSQITUR, & + & PFPLSL, PFPLSN, PFHPSL, PFHPSN & + & ) + ! Driver routine that invokes the optimized CLAW-based CLOUDSC GPU kernel + + INTEGER(KIND=JPIM) :: JL + INTEGER(KIND=JPIM) :: NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG + INTEGER(KIND=JPIM) :: KFLDX + REAL(KIND=JPRB) :: PTSPHY ! Physics timestep + REAL(KIND=JPRB), INTENT(IN) :: PT(NPROMA, NLEV, NGPBLKS) ! T at start of callpar + REAL(KIND=JPRB), INTENT(IN) :: PQ(NPROMA, NLEV, NGPBLKS) ! Q at start of callpar + REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_CML(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_CML + REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_TMP(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_TMP + REAL(KIND=JPRB), INTENT(INOUT) :: BUFFER_LOC(NPROMA,NLEV,3+NCLV,NGPBLKS) ! Storage buffer for TENDENCY_LOC + REAL(KIND=JPRB), INTENT(IN) :: PVFA(NPROMA, NLEV, NGPBLKS) ! CC from VDF scheme + REAL(KIND=JPRB), INTENT(IN) :: PVFL(NPROMA, NLEV, NGPBLKS) ! Liq from VDF scheme + REAL(KIND=JPRB), INTENT(IN) :: PVFI(NPROMA, NLEV, NGPBLKS) ! Ice from VDF scheme + REAL(KIND=JPRB), INTENT(IN) :: PDYNA(NPROMA, NLEV, NGPBLKS) ! CC from Dynamics + REAL(KIND=JPRB), INTENT(IN) :: PDYNL(NPROMA, NLEV, NGPBLKS) ! Liq from Dynamics + REAL(KIND=JPRB), INTENT(IN) :: PDYNI(NPROMA, NLEV, NGPBLKS) ! Liq from Dynamics + REAL(KIND=JPRB), INTENT(IN) :: PHRSW(NPROMA, NLEV, NGPBLKS) ! Short-wave heating rate + REAL(KIND=JPRB), INTENT(IN) :: PHRLW(NPROMA, NLEV, NGPBLKS) ! Long-wave heating rate + REAL(KIND=JPRB), INTENT(IN) :: PVERVEL(NPROMA, NLEV, NGPBLKS) !Vertical velocity + REAL(KIND=JPRB), INTENT(IN) :: PAP(NPROMA, NLEV, NGPBLKS) ! Pressure on full levels + REAL(KIND=JPRB), INTENT(IN) :: PAPH(NPROMA, NLEV+1, NGPBLKS) ! Pressure on half levels + REAL(KIND=JPRB), INTENT(IN) :: PLSM(NPROMA, NGPBLKS) ! Land fraction (0-1) + LOGICAL, INTENT(IN) :: LDCUM(NPROMA, NGPBLKS) ! Convection active + INTEGER(KIND=JPIM), INTENT(IN) :: KTYPE(NPROMA, NGPBLKS) ! Convection type 0,1,2 + REAL(KIND=JPRB), INTENT(IN) :: PLU(NPROMA, NLEV, NGPBLKS) ! Conv. condensate + REAL(KIND=JPRB), INTENT(INOUT) :: PLUDE(NPROMA, NLEV, NGPBLKS) ! Conv. detrained water + REAL(KIND=JPRB), INTENT(IN) :: PSNDE(NPROMA, NLEV, NGPBLKS) ! Conv. detrained snow + REAL(KIND=JPRB), INTENT(IN) :: PMFU(NPROMA, NLEV, NGPBLKS) ! Conv. mass flux up + REAL(KIND=JPRB), INTENT(IN) :: PMFD(NPROMA, NLEV, NGPBLKS) ! Conv. mass flux down + REAL(KIND=JPRB), INTENT(IN) :: PA(NPROMA, NLEV, NGPBLKS) ! Original Cloud fraction (t) + REAL(KIND=JPRB), INTENT(IN) :: PCLV(NPROMA, NLEV, NCLV, NGPBLKS) + REAL(KIND=JPRB), INTENT(IN) :: PSUPSAT(NPROMA, NLEV, NGPBLKS) + REAL(KIND=JPRB), INTENT(IN) :: PLCRIT_AER(NPROMA, NLEV, NGPBLKS) + REAL(KIND=JPRB), INTENT(IN) :: PICRIT_AER(NPROMA, NLEV, NGPBLKS) + REAL(KIND=JPRB), INTENT(IN) :: PRE_ICE(NPROMA, NLEV, NGPBLKS) + REAL(KIND=JPRB), INTENT(IN) :: PCCN(NPROMA, NLEV, NGPBLKS) ! liquid cloud condensation nuclei + REAL(KIND=JPRB), INTENT(IN) :: PNICE(NPROMA, NLEV, NGPBLKS) ! ice number concentration (cf. CCN) + + REAL(KIND=JPRB), INTENT(INOUT) :: PCOVPTOT(NPROMA, NLEV, NGPBLKS) ! Precip fraction + REAL(KIND=JPRB), INTENT(OUT) :: PRAINFRAC_TOPRFZ(NPROMA, NGPBLKS) + ! Flux diagnostics for DDH budget + REAL(KIND=JPRB), INTENT(OUT) :: PFSQLF(NPROMA, NLEV+1, NGPBLKS) ! Flux of liquid + REAL(KIND=JPRB), INTENT(OUT) :: PFSQIF(NPROMA, NLEV+1, NGPBLKS) ! Flux of ice + REAL(KIND=JPRB), INTENT(OUT) :: PFCQLNG(NPROMA, NLEV+1, NGPBLKS) ! -ve corr for liq + REAL(KIND=JPRB), INTENT(OUT) :: PFCQNNG(NPROMA, NLEV+1, NGPBLKS) ! -ve corr for ice + REAL(KIND=JPRB), INTENT(OUT) :: PFSQRF(NPROMA, NLEV+1, NGPBLKS) ! Flux diagnostics + REAL(KIND=JPRB), INTENT(OUT) :: PFSQSF(NPROMA, NLEV+1, NGPBLKS) ! for DDH, generic + REAL(KIND=JPRB), INTENT(OUT) :: PFCQRNG(NPROMA, NLEV+1, NGPBLKS) ! rain + REAL(KIND=JPRB), INTENT(OUT) :: PFCQSNG(NPROMA, NLEV+1, NGPBLKS) ! snow + REAL(KIND=JPRB), INTENT(OUT) :: PFSQLTUR(NPROMA, NLEV+1, NGPBLKS) ! liquid flux due to VDF + REAL(KIND=JPRB), INTENT(OUT) :: PFSQITUR(NPROMA, NLEV+1, NGPBLKS) ! ice flux due to VDF + REAL(KIND=JPRB), INTENT(OUT) :: PFPLSL(NPROMA, NLEV+1, NGPBLKS) ! liq+rain sedim flux + REAL(KIND=JPRB), INTENT(OUT) :: PFPLSN(NPROMA, NLEV+1, NGPBLKS) ! ice+snow sedim flux + REAL(KIND=JPRB), INTENT(OUT) :: PFHPSL(NPROMA, NLEV+1, NGPBLKS) ! Enthalpy flux for liq + REAL(KIND=JPRB), INTENT(OUT) :: PFHPSN(NPROMA, NLEV+1, NGPBLKS) ! ice number concentration (cf. CCN) + + INTEGER(KIND=JPIM) :: JKGLO,IBL,ICEND + TYPE(PERFORMANCE_TIMER) :: TIMER + INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1 + + ! Local copy of cloud parameters for offload + TYPE(TECLDP) :: LOCAL_YRECLDP + + NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) +1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0) + if (irank == 0) then + write(0,1003) NUMPROC,NUMOMP,NGPTOTG,NPROMA,NGPBLKS + end if + + ! Global timer for the parallel region + CALL TIMER%START(NUMOMP) + + ! Workaround for PGI / OpenACC oddities: + ! Create a local copy of the parameter struct to ensure they get + ! moved to the device the in ``acc data`` clause below + LOCAL_YRECLDP = YRECLDP + +!$acc data & +!$acc copyin( & +!$acc pt,pq,buffer_cml,buffer_tmp,pvfa, & +!$acc pvfl,pvfi,pdyna,pdynl,pdyni,phrsw,phrlw,pvervel, & +!$acc pap,paph,plsm,ldcum,ktype,plu,psnde, & +!$acc pmfu,pmfd,pa,pclv,psupsat,plcrit_aer,picrit_aer, & +!$acc pre_ice,pccn,pnice, yrecldp) & +!$acc copy( & +!$acc buffer_loc,plude,pcovptot,prainfrac_toprfz) & +!$acc copyout( & +!$acc pfsqlf,pfsqif,pfcqnng, & +!$acc pfcqlng ,pfsqrf,pfsqsf,pfcqrng,pfcqsng,pfsqltur, & +!$acc pfsqitur,pfplsl,pfplsn,pfhpsl,pfhpsn) + + ! Local timer for each thread + TID = GET_THREAD_NUM() + CALL TIMER%THREAD_START(TID) + +!$acc parallel loop gang vector_length(NPROMA) + DO JKGLO=1,NGPTOT,NPROMA + IBL=(JKGLO-1)/NPROMA+1 + ICEND=MIN(NPROMA,NGPTOT-JKGLO+1) + + !$acc loop vector + DO JL=1,ICEND + CALL CLOUDSC_SCC_K_CACHING & + & (1, ICEND, NPROMA, NLEV, PTSPHY,& + & PT(:,:,IBL), PQ(:,:,IBL), & + & BUFFER_TMP(:,:,1,IBL), BUFFER_TMP(:,:,3,IBL), BUFFER_TMP(:,:,2,IBL), BUFFER_TMP(:,:,4:8,IBL), & + & BUFFER_LOC(:,:,1,IBL), BUFFER_LOC(:,:,3,IBL), BUFFER_LOC(:,:,2,IBL), BUFFER_LOC(:,:,4:8,IBL), & + & PVFA(:,:,IBL), PVFL(:,:,IBL), PVFI(:,:,IBL), PDYNA(:,:,IBL), PDYNL(:,:,IBL), PDYNI(:,:,IBL), & + & PHRSW(:,:,IBL), PHRLW(:,:,IBL),& + & PVERVEL(:,:,IBL), PAP(:,:,IBL), PAPH(:,:,IBL),& + & PLSM(:,IBL), LDCUM(:,IBL), KTYPE(:,IBL), & + & PLU(:,:,IBL), PLUDE(:,:,IBL), PSNDE(:,:,IBL), PMFU(:,:,IBL), PMFD(:,:,IBL),& + !---prognostic fields + & PA(:,:,IBL), PCLV(:,:,:,IBL), PSUPSAT(:,:,IBL),& + !-- arrays for aerosol-cloud interactions + & PLCRIT_AER(:,:,IBL),PICRIT_AER(:,:,IBL),& + & PRE_ICE(:,:,IBL),& + & PCCN(:,:,IBL), PNICE(:,:,IBL),& + !---diagnostic output + & PCOVPTOT(:,:,IBL), PRAINFRAC_TOPRFZ(:,IBL),& + !---resulting fluxes + & PFSQLF(:,:,IBL), PFSQIF (:,:,IBL), PFCQNNG(:,:,IBL), PFCQLNG(:,:,IBL),& + & PFSQRF(:,:,IBL), PFSQSF (:,:,IBL), PFCQRNG(:,:,IBL), PFCQSNG(:,:,IBL),& + & PFSQLTUR(:,:,IBL), PFSQITUR (:,:,IBL), & + & PFPLSL(:,:,IBL), PFPLSN(:,:,IBL), PFHPSL(:,:,IBL), PFHPSN(:,:,IBL),& + & YRECLDP=LOCAL_YRECLDP, JL=JL) + ENDDO + ENDDO +!$acc end parallel loop + + CALL TIMER%THREAD_END(TID) + +!$acc end data + + CALL TIMER%END() + + ! On GPUs, adding block-level column totals is cumbersome and + ! error prone, and of little value due to the large number of + ! processing "thread teams". Instead we register the total here. + CALL TIMER%THREAD_LOG(TID=TID, IGPC=NGPTOT) + + CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT) + + END SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_K_CACHING + +END MODULE CLOUDSC_DRIVER_GPU_SCC_K_CACHING_MOD diff --git a/src/cloudsc_gpu/cloudsc_gpu_scc_k_caching_mod.F90 b/src/cloudsc_gpu/cloudsc_gpu_scc_k_caching_mod.F90 new file mode 100644 index 00000000..83f0d551 --- /dev/null +++ b/src/cloudsc_gpu/cloudsc_gpu_scc_k_caching_mod.F90 @@ -0,0 +1,2641 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE CLOUDSC_GPU_SCC_K_CACHING_MOD + +CONTAINS + SUBROUTINE CLOUDSC_SCC_K_CACHING (KIDIA, KFDIA, KLON, KLEV, PTSPHY, PT, PQ, TENDENCY_TMP_T, TENDENCY_TMP_Q, TENDENCY_TMP_A, & + & TENDENCY_TMP_CLD, TENDENCY_LOC_T, TENDENCY_LOC_Q, TENDENCY_LOC_A, TENDENCY_LOC_CLD, PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, & + & PHRSW, PHRLW, PVERVEL, PAP, PAPH, PLSM, LDCUM, KTYPE, PLU, PLUDE, PSNDE, PMFU, PMFD, PA, PCLV, PSUPSAT, PLCRIT_AER, & + & PICRIT_AER, PRE_ICE, PCCN, PNICE, PCOVPTOT, PRAINFRAC_TOPRFZ, PFSQLF, PFSQIF, PFCQNNG, PFCQLNG, PFSQRF, PFSQSF, PFCQRNG, & + & PFCQSNG, PFSQLTUR, PFSQITUR, PFPLSL, PFPLSN, PFHPSL, PFHPSN, YRECLDP, JL) + !---input + !---prognostic fields + !-- arrays for aerosol-cloud interactions + !!! & PQAER, KAER, & + !---diagnostic output + !---resulting fluxes + + !=============================================================================== + !**** *CLOUDSC* - ROUTINE FOR PARAMATERIZATION OF CLOUD PROCESSES + ! FOR PROGNOSTIC CLOUD SCHEME + !! + ! M.Tiedtke, C.Jakob, A.Tompkins, R.Forbes (E.C.M.W.F.) + !! + ! PURPOSE + ! ------- + ! THIS ROUTINE UPDATES THE CONV/STRAT CLOUD FIELDS. + ! THE FOLLOWING PROCESSES ARE CONSIDERED: + ! - Detrainment of cloud water from convective updrafts + ! - Evaporation/condensation of cloud water in connection + ! with heating/cooling such as by subsidence/ascent + ! - Erosion of clouds by turbulent mixing of cloud air + ! with unsaturated environmental air + ! - Deposition onto ice when liquid water present (Bergeron-Findeison) + ! - Conversion of cloud water into rain (collision-coalescence) + ! - Conversion of cloud ice to snow (aggregation) + ! - Sedimentation of rain, snow and ice + ! - Evaporation of rain and snow + ! - Melting of snow and ice + ! - Freezing of liquid and rain + ! Note: Turbulent transports of s,q,u,v at cloud tops due to + ! buoyancy fluxes and lw radiative cooling are treated in + ! the VDF scheme + !! + ! INTERFACE. + ! ---------- + ! *CLOUDSC* IS CALLED FROM *CALLPAR* + ! THE ROUTINE TAKES ITS INPUT FROM THE LONG-TERM STORAGE: + ! T,Q,L,PHI AND DETRAINMENT OF CLOUD WATER FROM THE + ! CONVECTIVE CLOUDS (MASSFLUX CONVECTION SCHEME), BOUNDARY + ! LAYER TURBULENT FLUXES OF HEAT AND MOISTURE, RADIATIVE FLUXES, + ! OMEGA. + ! IT RETURNS ITS OUTPUT TO: + ! 1.MODIFIED TENDENCIES OF MODEL VARIABLES T AND Q + ! AS WELL AS CLOUD VARIABLES L AND C + ! 2.GENERATES PRECIPITATION FLUXES FROM STRATIFORM CLOUDS + !! + ! EXTERNALS. + ! ---------- + ! NONE + !! + ! MODIFICATIONS. + ! ------------- + ! M. TIEDTKE E.C.M.W.F. 8/1988, 2/1990 + ! CH. JAKOB E.C.M.W.F. 2/1994 IMPLEMENTATION INTO IFS + ! A.TOMPKINS E.C.M.W.F. 2002 NEW NUMERICS + ! 01-05-22 : D.Salmond Safety modifications + ! 02-05-29 : D.Salmond Optimisation + ! 03-01-13 : J.Hague MASS Vector Functions J.Hague + ! 03-10-01 : M.Hamrud Cleaning + ! 04-12-14 : A.Tompkins New implicit solver and physics changes + ! 04-12-03 : A.Tompkins & M.Ko"hler moist PBL + ! G.Mozdzynski 09-Jan-2006 EXP security fix + ! 19-01-09 : P.Bechtold Changed increased RCLDIFF value for KTYPE=2 + ! 07-07-10 : A.Tompkins/R.Forbes 4-Phase flexible microphysics + ! 01-03-11 : R.Forbes Mixed phase changes and tidy up + ! 01-10-11 : R.Forbes Melt ice to rain, allow rain to freeze + ! 01-10-11 : R.Forbes Limit supersat to avoid excessive values + ! 31-10-11 : M.Ahlgrimm Add rain, snow and PEXTRA to DDH output + ! 17-02-12 : F.Vana Simplified/optimized LU factorization + ! 18-05-12 : F.Vana Cleaning + better support of sequential physics + ! N.Semane+P.Bechtold 04-10-2012 Add RVRFACTOR factor for small planet + ! 01-02-13 : R.Forbes New params of autoconv/acc,rain evap,snow riming + ! 15-03-13 : F. Vana New dataflow + more tendencies from the first call + ! K. Yessad (July 2014): Move some variables. + ! F. Vana 05-Mar-2015 Support for single precision + ! 15-01-15 : R.Forbes Added new options for snow evap & ice deposition + ! 10-01-15 : R.Forbes New physics for rain freezing + ! 23-10-14 : P. Bechtold remove zeroing of convection arrays + ! + ! SWITCHES. + ! -------- + !! + ! MODEL PARAMETERS + ! ---------------- + ! RCLDIFF: PARAMETER FOR EROSION OF CLOUDS + ! RCLCRIT_SEA: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER SEA + ! RCLCRIT_LAND: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER LAND + ! RLCRITSNOW: THRESHOLD VALUE FOR SNOW AUTOCONVERSION + ! RKCONV: PARAMETER FOR AUTOCONVERSION OF CLOUDS (KESSLER) + ! RCLDMAX: MAXIMUM POSSIBLE CLW CONTENT (MASON,1971) + !! + ! REFERENCES. + ! ---------- + ! TIEDTKE MWR 1993 + ! JAKOB PhD 2000 + ! GREGORY ET AL. QJRMS 2000 + ! TOMPKINS ET AL. QJRMS 2007 + !! + !=============================================================================== + + USE PARKIND1, ONLY: JPIM, JPRB + USE YOMPHYDER, ONLY: state_type + USE YOMCST, ONLY: RG, RD, RCPD, RETV, RLVTT, RLSTT, RLMLT, RTT, RV + USE YOETHF, ONLY: R2ES, R3LES, R3IES, R4LES, R4IES, R5LES, R5IES, R5ALVCP, R5ALSCP, RALVDCP, RALSDCP, RALFDCP, RTWAT, RTICE, & + & RTICECU, RTWAT_RTICE_R, RTWAT_RTICECU_R, RKOOP1, RKOOP2 + USE YOECLDP, ONLY: TECLDP, NCLDQV, NCLDQL, NCLDQR, NCLDQI, NCLDQS, NCLV + + + + + + IMPLICIT NONE + + !------------------------------------------------------------------------------- + ! Declare input/output arguments + !------------------------------------------------------------------------------- + + ! PLCRIT_AER : critical liquid mmr for rain autoconversion process + ! PICRIT_AER : critical liquid mmr for snow autoconversion process + ! PRE_LIQ : liq Re + ! PRE_ICE : ice Re + ! PCCN : liquid cloud condensation nuclei + ! PNICE : ice number concentration (cf. CCN) + + REAL(KIND=JPRB), INTENT(IN) :: PLCRIT_AER(KLON, KLEV) + REAL(KIND=JPRB), INTENT(IN) :: PICRIT_AER(KLON, KLEV) + REAL(KIND=JPRB), INTENT(IN) :: PRE_ICE(KLON, KLEV) + REAL(KIND=JPRB), INTENT(IN) :: PCCN(KLON, KLEV) ! liquid cloud condensation nuclei + REAL(KIND=JPRB), INTENT(IN) :: PNICE(KLON, KLEV) + ! ice number concentration (cf. CCN) + + INTEGER(KIND=JPIM), INTENT(IN) :: KLON ! Number of grid points + INTEGER(KIND=JPIM), INTENT(IN) :: KLEV ! Number of levels + INTEGER(KIND=JPIM), INTENT(IN) :: KIDIA + INTEGER(KIND=JPIM), INTENT(IN) :: KFDIA + REAL(KIND=JPRB), INTENT(IN) :: PTSPHY ! Physics timestep + REAL(KIND=JPRB), INTENT(IN) :: PT(KLON, KLEV) ! T at start of callpar + REAL(KIND=JPRB), INTENT(IN) :: PQ(KLON, KLEV) ! Q at start of callpar + REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_T(KLON, KLEV) + REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_Q(KLON, KLEV) + REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_A(KLON, KLEV) + REAL(KIND=JPRB), INTENT(IN) :: TENDENCY_TMP_CLD(KLON, KLEV, NCLV) + REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_T(KLON, KLEV) + REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_Q(KLON, KLEV) + REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_A(KLON, KLEV) + REAL(KIND=JPRB), INTENT(INOUT) :: TENDENCY_LOC_CLD(KLON, KLEV, NCLV) + REAL(KIND=JPRB), INTENT(IN) :: PVFA(KLON, KLEV) ! CC from VDF scheme + REAL(KIND=JPRB), INTENT(IN) :: PVFL(KLON, KLEV) ! Liq from VDF scheme + REAL(KIND=JPRB), INTENT(IN) :: PVFI(KLON, KLEV) ! Ice from VDF scheme + REAL(KIND=JPRB), INTENT(IN) :: PDYNA(KLON, KLEV) ! CC from Dynamics + REAL(KIND=JPRB), INTENT(IN) :: PDYNL(KLON, KLEV) ! Liq from Dynamics + REAL(KIND=JPRB), INTENT(IN) :: PDYNI(KLON, KLEV) ! Liq from Dynamics + REAL(KIND=JPRB), INTENT(IN) :: PHRSW(KLON, KLEV) ! Short-wave heating rate + REAL(KIND=JPRB), INTENT(IN) :: PHRLW(KLON, KLEV) ! Long-wave heating rate + REAL(KIND=JPRB), INTENT(IN) :: PVERVEL(KLON, KLEV) !Vertical velocity + REAL(KIND=JPRB), INTENT(IN) :: PAP(KLON, KLEV) ! Pressure on full levels + REAL(KIND=JPRB), INTENT(IN) :: PAPH(KLON, KLEV + 1) ! Pressure on half levels + REAL(KIND=JPRB), INTENT(IN) :: PLSM(KLON) ! Land fraction (0-1) + LOGICAL, INTENT(IN) :: LDCUM(KLON) ! Convection active + INTEGER(KIND=JPIM), INTENT(IN) :: KTYPE(KLON) ! Convection type 0,1,2 + REAL(KIND=JPRB), INTENT(IN) :: PLU(KLON, KLEV) ! Conv. condensate + REAL(KIND=JPRB), INTENT(INOUT) :: PLUDE(KLON, KLEV) ! Conv. detrained water + REAL(KIND=JPRB), INTENT(IN) :: PSNDE(KLON, KLEV) ! Conv. detrained snow + REAL(KIND=JPRB), INTENT(IN) :: PMFU(KLON, KLEV) ! Conv. mass flux up + REAL(KIND=JPRB), INTENT(IN) :: PMFD(KLON, KLEV) ! Conv. mass flux down + REAL(KIND=JPRB), INTENT(IN) :: PA(KLON, KLEV) + ! Original Cloud fraction (t) + + REAL(KIND=JPRB), INTENT(IN) :: PCLV(KLON, KLEV, NCLV) + + ! Supersat clipped at previous time level in SLTEND + REAL(KIND=JPRB), INTENT(IN) :: PSUPSAT(KLON, KLEV) + REAL(KIND=JPRB), INTENT(OUT) :: PCOVPTOT(KLON, KLEV) ! Precip fraction + REAL(KIND=JPRB), INTENT(OUT) :: PRAINFRAC_TOPRFZ(KLON) + ! Flux diagnostics for DDH budget + REAL(KIND=JPRB), INTENT(OUT) :: PFSQLF(KLON, KLEV + 1) ! Flux of liquid + REAL(KIND=JPRB), INTENT(OUT) :: PFSQIF(KLON, KLEV + 1) ! Flux of ice + REAL(KIND=JPRB), INTENT(OUT) :: PFCQLNG(KLON, KLEV + 1) ! -ve corr for liq + REAL(KIND=JPRB), INTENT(OUT) :: PFCQNNG(KLON, KLEV + 1) ! -ve corr for ice + REAL(KIND=JPRB), INTENT(OUT) :: PFSQRF(KLON, KLEV + 1) ! Flux diagnostics + REAL(KIND=JPRB), INTENT(OUT) :: PFSQSF(KLON, KLEV + 1) ! for DDH, generic + REAL(KIND=JPRB), INTENT(OUT) :: PFCQRNG(KLON, KLEV + 1) ! rain + REAL(KIND=JPRB), INTENT(OUT) :: PFCQSNG(KLON, KLEV + 1) ! snow + REAL(KIND=JPRB), INTENT(OUT) :: PFSQLTUR(KLON, KLEV + 1) ! liquid flux due to VDF + REAL(KIND=JPRB), INTENT(OUT) :: PFSQITUR(KLON, KLEV + 1) ! ice flux due to VDF + REAL(KIND=JPRB), INTENT(OUT) :: PFPLSL(KLON, KLEV + 1) ! liq+rain sedim flux + REAL(KIND=JPRB), INTENT(OUT) :: PFPLSN(KLON, KLEV + 1) ! ice+snow sedim flux + REAL(KIND=JPRB), INTENT(OUT) :: PFHPSL(KLON, KLEV + 1) ! Enthalpy flux for liq + REAL(KIND=JPRB), INTENT(OUT) :: PFHPSN(KLON, KLEV + 1) + ! Enthalp flux for ice + + TYPE(tecldp), INTENT(INOUT) :: YRECLDP + + !------------------------------------------------------------------------------- + ! Declare local variables + !------------------------------------------------------------------------------- + + REAL(KIND=JPRB) :: ZLCOND1, ZLCOND2, ZLEVAP, ZLEROS, ZLEVAPL, ZLEVAPI, ZRAINAUT, ZSNOWAUT, ZLIQCLD, ZICECLD + ! condensation and evaporation terms + ! autoconversion terms + REAL(KIND=JPRB) :: ZFOKOOP + REAL(KIND=JPRB) :: ZFOEALFA + REAL(KIND=JPRB) :: ZICENUCLEI + ! number concentration of ice nuclei + + REAL(KIND=JPRB) :: ZLICLD + REAL(KIND=JPRB) :: ZACOND + REAL(KIND=JPRB) :: ZAEROS + REAL(KIND=JPRB) :: ZLFINALSUM + REAL(KIND=JPRB) :: ZDQS + REAL(KIND=JPRB) :: ZTOLD + REAL(KIND=JPRB) :: ZQOLD + REAL(KIND=JPRB) :: ZDTGDP + REAL(KIND=JPRB) :: ZRDTGDP + REAL(KIND=JPRB) :: ZTRPAUS + REAL(KIND=JPRB) :: ZCOVPCLR + REAL(KIND=JPRB) :: ZPRECLR + REAL(KIND=JPRB) :: ZCOVPTOT + REAL(KIND=JPRB) :: ZCOVPMAX + REAL(KIND=JPRB) :: ZQPRETOT + REAL(KIND=JPRB) :: ZDPEVAP + REAL(KIND=JPRB) :: ZDTFORC + REAL(KIND=JPRB) :: ZDTDIAB + ! REAL(KIND=JPRB), INTENT(INOUT) :: ZTP1(KLON, KLEV) + REAL(KIND=JPRB) :: ZTP1(2) + REAL(KIND=JPRB) :: ZLDEFR + REAL(KIND=JPRB) :: ZLDIFDT + REAL(KIND=JPRB) :: ZDTGDPF + REAL(KIND=JPRB) :: ZLCUST(NCLV) + REAL(KIND=JPRB) :: ZACUST + REAL(KIND=JPRB) :: ZMF + + REAL(KIND=JPRB) :: ZRHO + REAL(KIND=JPRB) :: ZTMP1, ZTMP2, ZTMP3 + REAL(KIND=JPRB) :: ZTMP4, ZTMP5, ZTMP6, ZTMP7 + REAL(KIND=JPRB) :: ZALFAWM + + ! Accumulators of A,B,and C factors for cloud equations + REAL(KIND=JPRB) :: ZSOLAB ! -ve implicit CC + REAL(KIND=JPRB) :: ZSOLAC ! linear CC + REAL(KIND=JPRB) :: ZANEW + REAL(KIND=JPRB) :: ZANEWM1 + + REAL(KIND=JPRB) :: ZGDP + + !---for flux calculation + REAL(KIND=JPRB) :: ZDA + REAL(KIND=JPRB) :: ZLI + REAL(KIND=JPRB) :: ZA(2) + REAL(KIND=JPRB) :: ZAORIG + ! start of scheme value for CC + + LOGICAL :: LLFLAG + LOGICAL :: LLO1 + + INTEGER(KIND=JPIM) :: ICALL, IK, JK, JL, JM, JN, JO, JLEN, IS + + REAL(KIND=JPRB) :: ZDP, ZPAPHD + + REAL(KIND=JPRB) :: ZALFA + ! & ZALFACU, ZALFALS + REAL(KIND=JPRB) :: ZALFAW + REAL(KIND=JPRB) :: ZBETA, ZBETA1 + !REAL(KIND=JPRB) :: ZBOTT + REAL(KIND=JPRB) :: ZCFPR + REAL(KIND=JPRB) :: ZCOR + REAL(KIND=JPRB) :: ZCDMAX + REAL(KIND=JPRB) :: ZMIN + REAL(KIND=JPRB) :: ZLCONDLIM + REAL(KIND=JPRB) :: ZDENOM + REAL(KIND=JPRB) :: ZDPMXDT + REAL(KIND=JPRB) :: ZDPR + REAL(KIND=JPRB) :: ZDTDP + REAL(KIND=JPRB) :: ZE + REAL(KIND=JPRB) :: ZEPSEC + REAL(KIND=JPRB) :: ZFAC, ZFACI, ZFACW + REAL(KIND=JPRB) :: ZGDCP + REAL(KIND=JPRB) :: ZINEW + REAL(KIND=JPRB) :: ZLCRIT + REAL(KIND=JPRB) :: ZMFDN + REAL(KIND=JPRB) :: ZPRECIP + REAL(KIND=JPRB) :: ZQE + REAL(KIND=JPRB) :: ZQSAT, ZQTMST, ZRDCP + REAL(KIND=JPRB) :: ZRHC, ZSIG, ZSIGK + REAL(KIND=JPRB) :: ZWTOT + REAL(KIND=JPRB) :: ZZCO, ZZDL, ZZRH, ZZZDT, ZQADJ + REAL(KIND=JPRB) :: ZQNEW, ZTNEW + REAL(KIND=JPRB) :: ZRG_R, ZGDPH_R, ZCONS1, ZCOND, ZCONS1A + REAL(KIND=JPRB) :: ZLFINAL + REAL(KIND=JPRB) :: ZMELT + REAL(KIND=JPRB) :: ZEVAP + REAL(KIND=JPRB) :: ZFRZ + REAL(KIND=JPRB) :: ZVPLIQ, ZVPICE + REAL(KIND=JPRB) :: ZADD, ZBDD, ZCVDS, ZICE0, ZDEPOS + REAL(KIND=JPRB) :: ZSUPSAT + REAL(KIND=JPRB) :: ZFALL + REAL(KIND=JPRB) :: ZRE_ICE + REAL(KIND=JPRB) :: ZRLDCP + REAL(KIND=JPRB) :: ZQP1ENV + + !---------------------------- + ! Arrays for new microphysics + !---------------------------- + INTEGER(KIND=JPIM) :: IPHASE(NCLV) + ! marker for water phase of each species + ! 0=vapour, 1=liquid, 2=ice + + INTEGER(KIND=JPIM) :: IMELT(NCLV) + ! marks melting linkage for ice categories + ! ice->liquid, snow->rain + + LOGICAL :: LLFALL(NCLV) + ! marks falling species + ! LLFALL=0, cloud cover must > 0 for zqx > 0 + ! LLFALL=1, no cloud needed, zqx can evaporate + + LOGICAL :: LLINDEX1(NCLV) ! index variable + LOGICAL :: LLINDEX3(NCLV, NCLV) ! index variable + REAL(KIND=JPRB) :: ZMAX + REAL(KIND=JPRB) :: ZRAT + INTEGER(KIND=JPIM) :: IORDER(NCLV) + ! array for sorting explicit terms + + REAL(KIND=JPRB) :: ZLIQFRAC ! cloud liquid water fraction: ql/(ql+qi) + REAL(KIND=JPRB) :: ZICEFRAC ! cloud ice water fraction: qi/(ql+qi) + REAL(KIND=JPRB) :: ZQX(NCLV) ! water variables + REAL(KIND=JPRB) :: ZQX0(NCLV) ! water variables at start of scheme + REAL(KIND=JPRB) :: ZQXN(NCLV) ! new values for zqx at time+1 + REAL(KIND=JPRB) :: ZQXFG(NCLV) ! first guess values including precip + REAL(KIND=JPRB) :: ZQXNM1(NCLV) ! new values for zqx at time+1 at level above + REAL(KIND=JPRB) :: ZFLUXQ(NCLV) + ! fluxes convergence of species (needed?) + ! Keep the following for possible future total water variance scheme? + !REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + !REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + !REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + !REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + !REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + REAL(KIND=JPRB) :: ZPFPLSX(2, NCLV) ! generalized precipitation flux + REAL(KIND=JPRB) :: ZLNEG(NCLV) ! for negative correction diagnostics + REAL(KIND=JPRB) :: ZMELTMAX + REAL(KIND=JPRB) :: ZFRZMAX + REAL(KIND=JPRB) :: ZICETOT + + REAL(KIND=JPRB) :: ZQXN2D(NCLV) + ! water variables store + + REAL(KIND=JPRB) :: ZQSMIX + ! diagnostic mixed phase saturation + !REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + REAL(KIND=JPRB) :: ZQSLIQ ! liquid water saturation + REAL(KIND=JPRB) :: ZQSICE + ! ice water saturation + + !REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + !REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + !REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + REAL(KIND=JPRB) :: ZFOEEWMT + REAL(KIND=JPRB) :: ZFOEEW + REAL(KIND=JPRB) :: ZFOEELIQT + !REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + REAL(KIND=JPRB) :: ZDQSLIQDT, ZDQSICEDT, ZDQSMIXDT + REAL(KIND=JPRB) :: ZCORQSLIQ + REAL(KIND=JPRB) :: ZCORQSICE + !REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + REAL(KIND=JPRB) :: ZCORQSMIX + REAL(KIND=JPRB) :: ZEVAPLIMLIQ, ZEVAPLIMICE, ZEVAPLIMMIX + + !------------------------------------------------------- + ! SOURCE/SINK array for implicit and explicit terms + !------------------------------------------------------- + ! a POSITIVE value entered into the arrays is a... + ! Source of this variable + ! | + ! | Sink of this variable + ! | | + ! V V + ! ZSOLQA(JL,IQa,IQb) = explicit terms + ! ZSOLQB(JL,IQa,IQb) = implicit terms + ! Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + ! a source of NCLDQL and a sink of IQV + ! put 'magic' source terms such as PLUDE from + ! detrainment into explicit source/sink array diagnognal + ! ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + ! i.e. A positive value is a sink!????? weird... + !------------------------------------------------------- + + REAL(KIND=JPRB) :: ZSOLQA(NCLV, NCLV) ! explicit sources and sinks + REAL(KIND=JPRB) :: ZSOLQB(NCLV, NCLV) + ! implicit sources and sinks + ! e.g. microphysical pathways between ice variables. + REAL(KIND=JPRB) :: ZQLHS(NCLV, NCLV) ! n x n matrix storing the LHS of implicit solver + REAL(KIND=JPRB) :: ZVQX(NCLV) ! fall speeds of three categories + REAL(KIND=JPRB) :: ZEXPLICIT, ZRATIO(NCLV), ZSINKSUM(NCLV) + + ! for sedimentation source/sink terms + REAL(KIND=JPRB) :: ZFALLSINK(NCLV) + REAL(KIND=JPRB) :: ZFALLSRCE(NCLV) + + ! for convection detrainment source and subsidence source/sink terms + REAL(KIND=JPRB) :: ZCONVSRCE(NCLV) + REAL(KIND=JPRB) :: ZCONVSINK(NCLV) + + ! for supersaturation source term from previous timestep + REAL(KIND=JPRB) :: ZPSUPSATSRCE(NCLV) + + ! Numerical fit to wet bulb temperature + REAL(KIND=JPRB), PARAMETER :: ZTW1 = 1329.31_JPRB + REAL(KIND=JPRB), PARAMETER :: ZTW2 = 0.0074615_JPRB + REAL(KIND=JPRB), PARAMETER :: ZTW3 = 0.85E5_JPRB + REAL(KIND=JPRB), PARAMETER :: ZTW4 = 40.637_JPRB + REAL(KIND=JPRB), PARAMETER :: ZTW5 = 275.0_JPRB + + REAL(KIND=JPRB) :: ZSUBSAT ! Subsaturation for snow melting term + REAL(KIND=JPRB) :: ZTDMTW0 + ! Diff between dry-bulb temperature and + ! temperature when wet-bulb = 0degC + + ! Variables for deposition term + REAL(KIND=JPRB) :: ZTCG ! Temperature dependent function for ice PSD + REAL(KIND=JPRB) :: ZFACX1I, ZFACX1S ! PSD correction factor + REAL(KIND=JPRB) :: ZAPLUSB, ZCORRFAC, ZCORRFAC2, ZPR02, ZTERM1, ZTERM2 ! for ice dep + REAL(KIND=JPRB) :: ZCLDTOPDIST ! Distance from cloud top + REAL(KIND=JPRB) :: ZINFACTOR + ! No. of ice nuclei factor for deposition + + ! Autoconversion/accretion/riming/evaporation + INTEGER(KIND=JPIM) :: IWARMRAIN + INTEGER(KIND=JPIM) :: IEVAPRAIN + INTEGER(KIND=JPIM) :: IEVAPSNOW + INTEGER(KIND=JPIM) :: IDEPICE + REAL(KIND=JPRB) :: ZRAINACC + REAL(KIND=JPRB) :: ZRAINCLD + REAL(KIND=JPRB) :: ZSNOWRIME + REAL(KIND=JPRB) :: ZSNOWCLD + REAL(KIND=JPRB) :: ZESATLIQ + REAL(KIND=JPRB) :: ZFALLCORR + REAL(KIND=JPRB) :: ZLAMBDA + REAL(KIND=JPRB) :: ZEVAP_DENOM + REAL(KIND=JPRB) :: ZCORR2 + REAL(KIND=JPRB) :: ZKA + REAL(KIND=JPRB) :: ZCONST + REAL(KIND=JPRB) :: ZTEMP + + ! Rain freezing + LOGICAL :: LLRAINLIQ + ! True if majority of raindrops are liquid (no ice core) + + !---------------------------- + ! End: new microphysics + !---------------------------- + + !---------------------- + ! SCM budget statistics + !---------------------- + REAL(KIND=JPRB) :: ZRAIN + + REAL(KIND=JPRB) :: ZHOOK_HANDLE + REAL(KIND=JPRB) :: ZTMPL, ZTMPI, ZTMPA + + REAL(KIND=JPRB) :: ZMM, ZRR + REAL(KIND=JPRB) :: ZRG + + REAL(KIND=JPRB) :: ZZSUM, ZZRATIO + REAL(KIND=JPRB) :: ZEPSILON + + REAL(KIND=JPRB) :: ZCOND1, ZQP + + REAL(KIND=JPRB) :: PSUM_SOLQA + + INTEGER(KIND=JPIM) :: JK_I, JK_IP1, JK_IM1 + + +#include "fcttre.func.h" +#include "fccld.func.h" +!$acc routine seq + + + !=============================================================================== + !IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) + + !=============================================================================== + ! 0.0 Beginning of timestep book-keeping + !---------------------------------------------------------------------- + + + !###################################################################### + ! 0. *** SET UP CONSTANTS *** + !###################################################################### + + ZEPSILON = 100._JPRB*EPSILON(ZEPSILON) + + ! --------------------------------------------------------------------- + ! Set version of warm-rain autoconversion/accretion + ! IWARMRAIN = 1 ! Sundquist + ! IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + ! --------------------------------------------------------------------- + IWARMRAIN = 2 + ! --------------------------------------------------------------------- + ! Set version of rain evaporation + ! IEVAPRAIN = 1 ! Sundquist + ! IEVAPRAIN = 2 ! Abel and Boutle (2013) + ! --------------------------------------------------------------------- + IEVAPRAIN = 2 + ! --------------------------------------------------------------------- + ! Set version of snow evaporation + ! IEVAPSNOW = 1 ! Sundquist + ! IEVAPSNOW = 2 ! New + ! --------------------------------------------------------------------- + IEVAPSNOW = 1 + ! --------------------------------------------------------------------- + ! Set version of ice deposition + ! IDEPICE = 1 ! Rotstayn (2001) + ! IDEPICE = 2 ! New + ! --------------------------------------------------------------------- + IDEPICE = 1 + + ! --------------------- + ! Some simple constants + ! --------------------- + ZQTMST = 1.0_JPRB / PTSPHY + ZGDCP = RG / RCPD + ZRDCP = RD / RCPD + ZCONS1A = RCPD / ((RLMLT*RG*YRECLDP%RTAUMEL)) + ZEPSEC = 1.E-14_JPRB + ZRG_R = 1.0_JPRB / RG + ZRLDCP = 1.0_JPRB / (RALSDCP - RALVDCP) + + ! Note: Defined in module/yoecldp.F90 + ! NCLDQL=1 ! liquid cloud water + ! NCLDQI=2 ! ice cloud water + ! NCLDQR=3 ! rain water + ! NCLDQS=4 ! snow + ! NCLDQV=5 ! vapour + + ! ----------------------------------------------- + ! Define species phase, 0=vapour, 1=liquid, 2=ice + ! ----------------------------------------------- + IPHASE(NCLDQV) = 0 + IPHASE(NCLDQL) = 1 + IPHASE(NCLDQR) = 1 + IPHASE(NCLDQI) = 2 + IPHASE(NCLDQS) = 2 + + ! --------------------------------------------------- + ! Set up melting/freezing index, + ! if an ice category melts/freezes, where does it go? + ! --------------------------------------------------- + IMELT(NCLDQV) = -99 + IMELT(NCLDQL) = NCLDQI + IMELT(NCLDQR) = NCLDQS + IMELT(NCLDQI) = NCLDQR + IMELT(NCLDQS) = NCLDQR + + ! ----------------------------------------------- + ! INITIALIZATION OF OUTPUT TENDENCIES + ! ----------------------------------------------- +!$acc loop seq + DO JK=1,KLEV + TENDENCY_LOC_T(JL, JK) = 0.0_JPRB + TENDENCY_LOC_Q(JL, JK) = 0.0_JPRB + TENDENCY_LOC_A(JL, JK) = 0.0_JPRB + END DO +!$acc loop seq + DO JM=1,NCLV - 1 + DO JK=1,KLEV + TENDENCY_LOC_CLD(JL, JK, JM) = 0.0_JPRB + END DO + END DO + + !-- These were uninitialized : meaningful only when we compare error differences +!$acc loop seq + DO JK=1,KLEV + PCOVPTOT(JL, JK) = 0.0_JPRB + TENDENCY_LOC_CLD(JL, JK, NCLV) = 0.0_JPRB + END DO + + !-------- + ! Fluxes: + !-------- + PFSQLF(JL, 1) = 0.0_JPRB + PFSQIF(JL, 1) = 0.0_JPRB + PFSQRF(JL, 1) = 0.0_JPRB + PFSQSF(JL, 1) = 0.0_JPRB + PFCQLNG(JL, 1) = 0.0_JPRB + PFCQNNG(JL, 1) = 0.0_JPRB + PFCQRNG(JL, 1) = 0.0_JPRB !rain + PFCQSNG(JL, 1) = 0.0_JPRB !snow + ! fluxes due to turbulence + PFSQLTUR(JL, 1) = 0.0_JPRB + PFSQITUR(JL, 1) = 0.0_JPRB + + ! ------------------------- + ! set up fall speeds in m/s + ! ------------------------- + ZVQX(NCLDQV) = 0.0_JPRB + ZVQX(NCLDQL) = 0.0_JPRB + ZVQX(NCLDQI) = YRECLDP%RVICE + ZVQX(NCLDQR) = YRECLDP%RVRAIN + ZVQX(NCLDQS) = YRECLDP%RVSNOW + LLFALL(:) = .false. +!$acc loop seq + DO JM=1,NCLV + IF (ZVQX(JM) > 0.0_JPRB) LLFALL(JM) = .true. + ! falling species + END DO + ! Set LLFALL to false for ice (but ice still sediments!) + ! Need to rationalise this at some point + LLFALL(NCLDQI) = .false. + + PRAINFRAC_TOPRFZ(JL) = 0.0_JPRB ! rain fraction at top of refreezing layer + LLRAINLIQ = .true. ! Assume all raindrops are liquid initially + + !###################################################################### + ! 1. *** INITIAL VALUES FOR VARIABLES *** + !###################################################################### + + !----------------------------- + ! Reset single level variables + !----------------------------- + + ZANEWM1 = 0.0_JPRB + ZDA = 0.0_JPRB + ZCOVPCLR = 0.0_JPRB + ZCOVPMAX = 0.0_JPRB + ZCOVPTOT = 0.0_JPRB + ZCLDTOPDIST = 0.0_JPRB + + !------------- + ! zero arrays + !------------- +!$acc loop seq + DO JM=1,NCLV + ! DO JK=1,KLEV + 1 + ZPFPLSX(1, JM) = 0.0_JPRB ! precip fluxes + ZPFPLSX(2, JM) = 0.0_JPRB + ! END DO + END DO + + + ! ---------------------- + ! non CLV initialization + ! ---------------------- +!$acc loop seq + DO JK=1,KLEV + 1 + + ! Fortran counting is beautiful! + JK_I = MOD(JK+1, 2) + 1 + JK_IP1 = MOD(JK+2, 2) + 1 + JK_IM1 = MOD(JK, 2) + 1 + + IF (1<=JK .AND. JK<=KLEV) THEN + ZTP1(JK_I) = PT(JL, JK) + PTSPHY*TENDENCY_TMP_T(JL, JK) + ZQX(NCLDQV) = PQ(JL, JK) + PTSPHY*TENDENCY_TMP_Q(JL, JK) + ZQX0(NCLDQV) = PQ(JL, JK) + PTSPHY*TENDENCY_TMP_Q(JL, JK) + ZA(JK_I) = PA(JL, JK) + PTSPHY*TENDENCY_TMP_A(JL, JK) + ZAORIG = PA(JL, JK) + PTSPHY*TENDENCY_TMP_A(JL, JK) + ! END DO + + ! ------------------------------------- + ! initialization for CLV family + ! ------------------------------------- + DO JM=1,NCLV - 1 + ZQX(JM) = PCLV(JL, JK, JM) + PTSPHY*TENDENCY_TMP_CLD(JL, JK, JM) + ZQX0(JM) = PCLV(JL, JK, JM) + PTSPHY*TENDENCY_TMP_CLD(JL, JK, JM) + END DO + + DO JM=1,NCLV + ZQXN2D(JM) = 0.0_JPRB ! end of timestep values in 2D + ZLNEG(JM) = 0.0_JPRB ! negative input check + END DO + + ! ---------------------------------------------------- + ! Tidy up very small cloud cover or total cloud water + ! ---------------------------------------------------- + IF (ZQX(NCLDQL) + ZQX(NCLDQI) < YRECLDP%RLMIN .or. ZA(JK_I) < YRECLDP%RAMIN) THEN + + ! Evaporate small cloud liquid water amounts + ZLNEG(NCLDQL) = ZLNEG(NCLDQL) + ZQX(NCLDQL) + ZQADJ = ZQX(NCLDQL)*ZQTMST + TENDENCY_LOC_Q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + ZQADJ + TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALVDCP*ZQADJ + ZQX(NCLDQV) = ZQX(NCLDQV) + ZQX(NCLDQL) + ZQX(NCLDQL) = 0.0_JPRB + + ! Evaporate small cloud ice water amounts + ZLNEG(NCLDQI) = ZLNEG(NCLDQI) + ZQX(NCLDQI) + ZQADJ = ZQX(NCLDQI)*ZQTMST + TENDENCY_LOC_Q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + ZQADJ + TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALSDCP*ZQADJ + ZQX(NCLDQV) = ZQX(NCLDQV) + ZQX(NCLDQI) + ZQX(NCLDQI) = 0.0_JPRB + + ! Set cloud cover to zero + ZA(JK_I) = 0.0_JPRB + + END IF + + ! --------------------------------- + ! Tidy up small CLV variables + ! --------------------------------- + + !DIR$ IVDEP + DO JM=1,NCLV - 1 + !DIR$ IVDEP + IF (ZQX(JM) < YRECLDP%RLMIN) THEN + ZLNEG(JM) = ZLNEG(JM) + ZQX(JM) + ZQADJ = ZQX(JM)*ZQTMST + TENDENCY_LOC_Q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + ZQADJ + IF (IPHASE(JM) == 1) TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALVDCP*ZQADJ + IF (IPHASE(JM) == 2) TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) - RALSDCP*ZQADJ + ZQX(NCLDQV) = ZQX(NCLDQV) + ZQX(JM) + ZQX(JM) = 0.0_JPRB + END IF + END DO + + ! ------------------------------ + ! Define saturation values + ! ------------------------------ + + !---------------------------------------- + ! old *diagnostic* mixed phase saturation + !---------------------------------------- + ZFOEALFA = FOEALFA(ZTP1(JK_I)) + ZFOEEWMT = MIN(FOEEWM(ZTP1(JK_I)) / PAP(JL, JK), 0.5_JPRB) + ZQSMIX = ZFOEEWMT + ZQSMIX = ZQSMIX / (1.0_JPRB - RETV*ZQSMIX) + + !--------------------------------------------- + ! ice saturation T<273K + ! liquid water saturation for T>273K + !--------------------------------------------- + ZALFA = FOEDELTA(ZTP1(JK_I)) + ZFOEEW = MIN((ZALFA*FOEELIQ(ZTP1(JK_I)) + (1.0_JPRB - ZALFA)*FOEEICE(ZTP1(JK_I))) / PAP(JL, JK), 0.5_JPRB) + ZFOEEW = MIN(0.5_JPRB, ZFOEEW) + ZQSICE = ZFOEEW / (1.0_JPRB - RETV*ZFOEEW) + + !---------------------------------- + ! liquid water saturation + !---------------------------------- + ZFOEELIQT = MIN(FOEELIQ(ZTP1(JK_I)) / PAP(JL, JK), 0.5_JPRB) + ZQSLIQ = ZFOEELIQT + ZQSLIQ = ZQSLIQ / (1.0_JPRB - RETV*ZQSLIQ) + + ! !---------------------------------- + ! ! ice water saturation + ! !---------------------------------- + ! ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + ! ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + ! ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + + + !------------------------------------------ + ! Ensure cloud fraction is between 0 and 1 + !------------------------------------------ + ZA(JK_I) = MAX(0.0_JPRB, MIN(1.0_JPRB, ZA(JK_I))) + + !------------------------------------------------------------------- + ! Calculate liq/ice fractions (no longer a diagnostic relationship) + !------------------------------------------------------------------- + ZLI = ZQX(NCLDQL) + ZQX(NCLDQI) + IF (ZLI > YRECLDP%RLMIN) THEN + ZLIQFRAC = ZQX(NCLDQL) / ZLI + ZICEFRAC = 1.0_JPRB - ZLIQFRAC + ELSE + ZLIQFRAC = 0.0_JPRB + ZICEFRAC = 0.0_JPRB + END IF + + !###################################################################### + ! 2. *** CONSTANTS AND PARAMETERS *** + !###################################################################### + ! Calculate L in updrafts of bl-clouds + ! Specify QS, P/PS for tropopause (for c2) + ! And initialize variables + !------------------------------------------ + +! !--------------------------------- +! ! Find tropopause level (ZTRPAUS) +! !--------------------------------- +! ZTRPAUS = 0.1_JPRB +! ZPAPHD = 1.0_JPRB / PAPH(JL, KLEV + 1) +! !$acc loop seq +! DO JK=1,KLEV - 1 +! ZSIG = PAP(JL, JK)*ZPAPHD +! IF (ZSIG > 0.1_JPRB .and. ZSIG < 0.4_JPRB .and. ZTP1(JK_I) > ZTP1(JK + 1)) THEN +! ZTRPAUS = ZSIG +! END IF +! END DO + + !###################################################################### + ! 3. *** PHYSICS *** + !###################################################################### + + + !---------------------------------------------------------------------- + ! START OF VERTICAL LOOP + !---------------------------------------------------------------------- + + ! No longer the start of the loop, but beginning of the main section + IF (YRECLDP%NCLDTOP<=JK .AND. JK<=KLEV) THEN + + !---------------------------------------------------------------------- + ! 3.0 INITIALIZE VARIABLES + !---------------------------------------------------------------------- + + !--------------------------------- + ! First guess microphysics + !--------------------------------- + DO JM=1,NCLV + ZQXFG(JM) = ZQX(JM) + END DO + + !--------------------------------- + ! Set KLON arrays to zero + !--------------------------------- + + ZLICLD = 0.0_JPRB + ZRAINAUT = 0.0_JPRB ! currently needed for diags + ZRAINACC = 0.0_JPRB ! currently needed for diags + ZSNOWAUT = 0.0_JPRB ! needed + ZLDEFR = 0.0_JPRB + ZACUST = 0.0_JPRB ! set later when needed + ZQPRETOT = 0.0_JPRB + ZLFINALSUM = 0.0_JPRB + + ! Required for first guess call + ZLCOND1 = 0.0_JPRB + ZLCOND2 = 0.0_JPRB + ZSUPSAT = 0.0_JPRB + ZLEVAPL = 0.0_JPRB + ZLEVAPI = 0.0_JPRB + + !------------------------------------- + ! solvers for cloud fraction + !------------------------------------- + ZSOLAB = 0.0_JPRB + ZSOLAC = 0.0_JPRB + + ZICETOT = 0.0_JPRB + + !------------------------------------------ + ! reset matrix so missing pathways are set + !------------------------------------------ + DO JM=1,NCLV + DO JN=1,NCLV + ZSOLQB(JN, JM) = 0.0_JPRB + ZSOLQA(JN, JM) = 0.0_JPRB + END DO + END DO + + !---------------------------------- + ! reset new microphysics variables + !---------------------------------- + DO JM=1,NCLV + ZFALLSRCE(JM) = 0.0_JPRB + ZFALLSINK(JM) = 0.0_JPRB + ZCONVSRCE(JM) = 0.0_JPRB + ZCONVSINK(JM) = 0.0_JPRB + ZPSUPSATSRCE(JM) = 0.0_JPRB + ZRATIO(JM) = 0.0_JPRB + END DO + + + !------------------------- + ! derived variables needed + !------------------------- + + ZDP = PAPH(JL, JK + 1) - PAPH(JL, JK) ! dp + ZGDP = RG / ZDP ! g/dp + ZRHO = PAP(JL, JK) / ((RD*ZTP1(JK_I))) ! p/RT air density + + ZDTGDP = PTSPHY*ZGDP ! dt g/dp + ZRDTGDP = ZDP*(1.0_JPRB / ((PTSPHY*RG))) ! 1/(dt g/dp) + + IF (JK > 1) ZDTGDPF = (PTSPHY*RG) / (PAP(JL, JK) - PAP(JL, JK - 1)) + + !------------------------------------ + ! Calculate dqs/dT correction factor + !------------------------------------ + ! Reminder: RETV=RV/RD-1 + + ! liquid + ZFACW = R5LES / ((ZTP1(JK_I) - R4LES)**2) + ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEELIQT) + ZDQSLIQDT = ZFACW*ZCOR*ZQSLIQ + ZCORQSLIQ = 1.0_JPRB + RALVDCP*ZDQSLIQDT + + ! ice + ZFACI = R5IES / ((ZTP1(JK_I) - R4IES)**2) + ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEEW) + ZDQSICEDT = ZFACI*ZCOR*ZQSICE + ZCORQSICE = 1.0_JPRB + RALSDCP*ZDQSICEDT + + ! diagnostic mixed + ZALFAW = ZFOEALFA + ZALFAWM = ZALFAW + ZFAC = ZALFAW*ZFACW + (1.0_JPRB - ZALFAW)*ZFACI + ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZFOEEWMT) + ZDQSMIXDT = ZFAC*ZCOR*ZQSMIX + ZCORQSMIX = 1.0_JPRB + FOELDCPM(ZTP1(JK_I))*ZDQSMIXDT + + ! evaporation/sublimation limits + ZEVAPLIMMIX = MAX((ZQSMIX - ZQX(NCLDQV)) / ZCORQSMIX, 0.0_JPRB) + ZEVAPLIMLIQ = MAX((ZQSLIQ - ZQX(NCLDQV)) / ZCORQSLIQ, 0.0_JPRB) + ZEVAPLIMICE = MAX((ZQSICE - ZQX(NCLDQV)) / ZCORQSICE, 0.0_JPRB) + + !-------------------------------- + ! in-cloud consensate amount + !-------------------------------- + ZTMPA = 1.0_JPRB / MAX(ZA(JK_I), ZEPSEC) + ZLIQCLD = ZQX(NCLDQL)*ZTMPA + ZICECLD = ZQX(NCLDQI)*ZTMPA + ZLICLD = ZLIQCLD + ZICECLD + + + !------------------------------------------------ + ! Evaporate very small amounts of liquid and ice + !------------------------------------------------ + + IF (ZQX(NCLDQL) < YRECLDP%RLMIN) THEN + ZSOLQA(NCLDQV, NCLDQL) = ZQX(NCLDQL) + ZSOLQA(NCLDQL, NCLDQV) = -ZQX(NCLDQL) + END IF + + IF (ZQX(NCLDQI) < YRECLDP%RLMIN) THEN + ZSOLQA(NCLDQV, NCLDQI) = ZQX(NCLDQI) + ZSOLQA(NCLDQI, NCLDQV) = -ZQX(NCLDQI) + END IF + + + !--------------------------------------------------------------------- + ! 3.1 ICE SUPERSATURATION ADJUSTMENT + !--------------------------------------------------------------------- + ! Note that the supersaturation adjustment is made with respect to + ! liquid saturation: when T>0C + ! ice saturation: when T<0C + ! with an adjustment made to allow for ice + ! supersaturation in the clear sky + ! Note also that the KOOP factor automatically clips the supersaturation + ! to a maximum set by the liquid water saturation mixing ratio + ! important for temperatures near to but below 0C + !----------------------------------------------------------------------- + + !DIR$ NOFUSION + + !----------------------------------- + ! 3.1.1 Supersaturation limit (from Koop) + !----------------------------------- + ! Needs to be set for all temperatures + ZFOKOOP = FOKOOP(ZTP1(JK_I)) + + IF (ZTP1(JK_I) >= RTT .or. YRECLDP%NSSOPT == 0) THEN + ZFAC = 1.0_JPRB + ZFACI = 1.0_JPRB + ELSE + ZFAC = ZA(JK_I) + ZFOKOOP*(1.0_JPRB - ZA(JK_I)) + ZFACI = PTSPHY / YRECLDP%RKOOPTAU + END IF + + !------------------------------------------------------------------- + ! 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + ! correction factor + ! [#Note: QSICE or QSLIQ] + !------------------------------------------------------------------- + + ! Calculate supersaturation to add to cloud + IF (ZA(JK_I) > 1.0_JPRB - YRECLDP%RAMIN) THEN + ZSUPSAT = MAX((ZQX(NCLDQV) - ZFAC*ZQSICE) / ZCORQSICE, 0.0_JPRB) + ELSE + ! Calculate environmental humidity supersaturation + ZQP1ENV = (ZQX(NCLDQV) - ZA(JK_I)*ZQSICE) / MAX(1.0_JPRB - ZA(JK_I), ZEPSILON) + !& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + ZSUPSAT = MAX(((1.0_JPRB - ZA(JK_I))*(ZQP1ENV - ZFAC*ZQSICE)) / ZCORQSICE, 0.0_JPRB) + END IF + + !------------------------------------------------------------------- + ! Here the supersaturation is turned into liquid water + ! However, if the temperature is below the threshold for homogeneous + ! freezing then the supersaturation is turned instantly to ice. + !-------------------------------------------------------------------- + + IF (ZSUPSAT > ZEPSEC) THEN + + IF (ZTP1(JK_I) > YRECLDP%RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZSUPSAT + ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZSUPSAT + ! Include liquid in first guess + ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZSUPSAT + ELSE + ! Turn supersaturation into ice water + ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZSUPSAT + ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZSUPSAT + ! Add ice to first guess for deposition term + ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZSUPSAT + END IF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC = (1.0_JPRB - ZA(JK_I))*ZFACI + + END IF + + !------------------------------------------------------- + ! 3.1.3 Include supersaturation from previous timestep + ! (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + !------------------------------------------------------- + IF (PSUPSAT(JL, JK) > ZEPSEC) THEN + IF (ZTP1(JK_I) > YRECLDP%RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(NCLDQL, NCLDQL) = ZSOLQA(NCLDQL, NCLDQL) + PSUPSAT(JL, JK) + ZPSUPSATSRCE(NCLDQL) = PSUPSAT(JL, JK) + ! Add liquid to first guess for deposition term + ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + PSUPSAT(JL, JK) + ! Store cloud budget diagnostics if required + ELSE + ! Turn supersaturation into ice water + ZSOLQA(NCLDQI, NCLDQI) = ZSOLQA(NCLDQI, NCLDQI) + PSUPSAT(JL, JK) + ZPSUPSATSRCE(NCLDQI) = PSUPSAT(JL, JK) + ! Add ice to first guess for deposition term + ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + PSUPSAT(JL, JK) + ! Store cloud budget diagnostics if required + END IF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC = (1.0_JPRB - ZA(JK_I))*ZFACI + ! Store cloud budget diagnostics if required + END IF + + ! on JL + + !--------------------------------------------------------------------- + ! 3.2 DETRAINMENT FROM CONVECTION + !--------------------------------------------------------------------- + ! * Diagnostic T-ice/liq split retained for convection + ! Note: This link is now flexible and a future convection + ! scheme can detrain explicit seperate budgets of: + ! cloud water, ice, rain and snow + ! * There is no (1-ZA) multiplier term on the cloud detrainment + ! term, since is now written in mass-flux terms + ! [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + !--------------------------------------------------------------------- + IF (JK < KLEV .and. JK >= YRECLDP%NCLDTOP) THEN + + + PLUDE(JL, JK) = PLUDE(JL, JK)*ZDTGDP + + IF (LDCUM(JL) .and. PLUDE(JL, JK) > YRECLDP%RLMIN .and. PLU(JL, JK + 1) > ZEPSEC) THEN + + ZSOLAC = ZSOLAC + PLUDE(JL, JK) / PLU(JL, JK + 1) + ! *diagnostic temperature split* + ZALFAW = ZFOEALFA + ZCONVSRCE(NCLDQL) = ZALFAW*PLUDE(JL, JK) + ZCONVSRCE(NCLDQI) = (1.0_JPRB - ZALFAW)*PLUDE(JL, JK) + ZSOLQA(NCLDQL, NCLDQL) = ZSOLQA(NCLDQL, NCLDQL) + ZCONVSRCE(NCLDQL) + ZSOLQA(NCLDQI, NCLDQI) = ZSOLQA(NCLDQI, NCLDQI) + ZCONVSRCE(NCLDQI) + + ELSE + + PLUDE(JL, JK) = 0.0_JPRB + + END IF + ! *convective snow detrainment source + IF (LDCUM(JL)) ZSOLQA(NCLDQS, NCLDQS) = ZSOLQA(NCLDQS, NCLDQS) + PSNDE(JL, JK)*ZDTGDP + + + END IF + ! JK YRECLDP%NCLDTOP) THEN + + ZMF = MAX(0.0_JPRB, (PMFU(JL, JK) + PMFD(JL, JK))*ZDTGDP) + ZACUST = ZMF*ZANEWM1 + + DO JM=1,NCLV + IF (.not.LLFALL(JM) .and. IPHASE(JM) > 0) THEN + ZLCUST(JM) = ZMF*ZQXNM1(JM) + ! record total flux for enthalpy budget: + ZCONVSRCE(JM) = ZCONVSRCE(JM) + ZLCUST(JM) + END IF + END DO + + ! Now have to work out how much liquid evaporates at arrival point + ! since there is no prognostic memory for in-cloud humidity, i.e. + ! we always assume cloud is saturated. + + ZDTDP = (ZRDCP*0.5_JPRB*(ZTP1(JK_IM1) + ZTP1(JK_I))) / PAPH(JL, JK) + ZDTFORC = ZDTDP*(PAP(JL, JK) - PAP(JL, JK - 1)) + ![#Note: Diagnostic mixed phase should be replaced below] + ZDQS = ZANEWM1*ZDTFORC*ZDQSMIXDT + + DO JM=1,NCLV + IF (.not.LLFALL(JM) .and. IPHASE(JM) > 0) THEN + ZLFINAL = MAX(0.0_JPRB, ZLCUST(JM) - ZDQS) !lim to zero + ! no supersaturation allowed incloud ---V + ZEVAP = MIN((ZLCUST(JM) - ZLFINAL), ZEVAPLIMMIX) + ! ZEVAP=0.0_JPRB + ZLFINAL = ZLCUST(JM) - ZEVAP + ZLFINALSUM = ZLFINALSUM + ZLFINAL ! sum + + ZSOLQA(JM, JM) = ZSOLQA(JM, JM) + ZLCUST(JM) ! whole sum + ZSOLQA(NCLDQV, JM) = ZSOLQA(NCLDQV, JM) + ZEVAP + ZSOLQA(JM, NCLDQV) = ZSOLQA(JM, NCLDQV) - ZEVAP + END IF + END DO + + ! Reset the cloud contribution if no cloud water survives to this level: + IF (ZLFINALSUM < ZEPSEC) ZACUST = 0.0_JPRB + ZSOLAC = ZSOLAC + ZACUST + + END IF + ! on JK>NCLDTOP + + !--------------------------------------------------------------------- + ! Subsidence sink of cloud to the layer below + ! (Implicit - re. CFL limit on convective mass flux) + !--------------------------------------------------------------------- + + + IF (JK < KLEV) THEN + + ZMFDN = MAX(0.0_JPRB, (PMFU(JL, JK + 1) + PMFD(JL, JK + 1))*ZDTGDP) + + ZSOLAB = ZSOLAB + ZMFDN + ZSOLQB(NCLDQL, NCLDQL) = ZSOLQB(NCLDQL, NCLDQL) + ZMFDN + ZSOLQB(NCLDQI, NCLDQI) = ZSOLQB(NCLDQI, NCLDQI) + ZMFDN + + ! Record sink for cloud budget and enthalpy budget diagnostics + ZCONVSINK(NCLDQL) = ZMFDN + ZCONVSINK(NCLDQI) = ZMFDN + + END IF + + + !---------------------------------------------------------------------- + ! 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + !---------------------------------------------------------------------- + ! NOTE: In default tiedtke scheme this process decreases the cloud + ! area but leaves the specific cloud water content + ! within clouds unchanged + !---------------------------------------------------------------------- + + ! ------------------------------ + ! Define turbulent erosion rate + ! ------------------------------ + ZLDIFDT = YRECLDP%RCLDIFF*PTSPHY !original version + !Increase by factor of 5 for convective points + IF (KTYPE(JL) > 0 .and. PLUDE(JL, JK) > ZEPSEC) ZLDIFDT = YRECLDP%RCLDIFF_CONVI*ZLDIFDT + + ! At the moment, works on mixed RH profile and partitioned ice/liq fraction + ! so that it is similar to previous scheme + ! Should apply RHw for liquid cloud and RHi for ice cloud separately + IF (ZLI > ZEPSEC) THEN + ! Calculate environmental humidity + ! ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + ! & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ! ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ZE = ZLDIFDT*MAX(ZQSMIX - ZQX(NCLDQV), 0.0_JPRB) + ZLEROS = ZA(JK_I)*ZE + ZLEROS = MIN(ZLEROS, ZEVAPLIMMIX) + ZLEROS = MIN(ZLEROS, ZLI) + ZAEROS = ZLEROS / ZLICLD !if linear term + + ! Erosion is -ve LINEAR in L,A + ZSOLAC = ZSOLAC - ZAEROS !linear + + ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) + ZLIQFRAC*ZLEROS + ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) - ZLIQFRAC*ZLEROS + ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) + ZICEFRAC*ZLEROS + ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) - ZICEFRAC*ZLEROS + + END IF + + !---------------------------------------------------------------------- + ! 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + !---------------------------------------------------------------------- + ! calculate dqs/dt + ! Note: For the separate prognostic Qi and Ql, one would ideally use + ! Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + ! forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + ! These would then instantaneous freeze if T<-38C or lead to ice growth + ! by deposition in warmer mixed phase clouds. However, since we do + ! not have a separate prognostic equation for in-cloud humidity or a + ! statistical scheme approach in place, the depositional growth of ice + ! in the mixed phase can not be modelled and we resort to supersaturation + ! wrt ice instanteously converting to ice over one timestep + ! (see Tompkins et al. QJRMS 2007 for details) + ! Thus for the initial implementation the diagnostic mixed phase is + ! retained for the moment, and the level of approximation noted. + !---------------------------------------------------------------------- + + ZDTDP = (ZRDCP*ZTP1(JK_I)) / PAP(JL, JK) + ZDPMXDT = ZDP*ZQTMST + ZMFDN = 0.0_JPRB + IF (JK < KLEV) ZMFDN = PMFU(JL, JK + 1) + PMFD(JL, JK + 1) + ZWTOT = PVERVEL(JL, JK) + 0.5_JPRB*RG*(PMFU(JL, JK) + PMFD(JL, JK) + ZMFDN) + ZWTOT = MIN(ZDPMXDT, MAX(-ZDPMXDT, ZWTOT)) + ZZZDT = PHRSW(JL, JK) + PHRLW(JL, JK) + ZDTDIAB = MIN(ZDPMXDT*ZDTDP, MAX(-ZDPMXDT*ZDTDP, ZZZDT))*PTSPHY + RALFDCP*ZLDEFR + ! Note: ZLDEFR should be set to the difference between the mixed phase functions + ! in the convection and cloud scheme, but this is not calculated, so is zero and + ! the functions must be the same + ZDTFORC = ZDTDP*ZWTOT*PTSPHY + ZDTDIAB + ZQOLD = ZQSMIX + ZTOLD = ZTP1(JK_I) + ZTP1(JK_I) = ZTP1(JK_I) + ZDTFORC + ZTP1(JK_I) = MAX(ZTP1(JK_I), 160.0_JPRB) + LLFLAG = .true. + + ! Formerly a call to CUADJTQ(..., ICALL=5) + ZQP = 1.0_JPRB / PAP(JL, JK) + ZQSAT = FOEEWM(ZTP1(JK_I))*ZQP + ZQSAT = MIN(0.5_JPRB, ZQSAT) + ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND = (ZQSMIX - ZQSAT) / (1.0_JPRB + ZQSAT*ZCOR*FOEDEM(ZTP1(JK_I))) + ZTP1(JK_I) = ZTP1(JK_I) + FOELDCPM(ZTP1(JK_I))*ZCOND + ZQSMIX = ZQSMIX - ZCOND + ZQSAT = FOEEWM(ZTP1(JK_I))*ZQP + ZQSAT = MIN(0.5_JPRB, ZQSAT) + ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND1 = (ZQSMIX - ZQSAT) / (1.0_JPRB + ZQSAT*ZCOR*FOEDEM(ZTP1(JK_I))) + ZTP1(JK_I) = ZTP1(JK_I) + FOELDCPM(ZTP1(JK_I))*ZCOND1 + ZQSMIX = ZQSMIX - ZCOND1 + + ZDQS = ZQSMIX - ZQOLD + ZQSMIX = ZQOLD + ZTP1(JK_I) = ZTOLD + + !---------------------------------------------------------------------- + ! 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + ! ---------------------------------------------------------------------- + ! Erosion term is LINEAR in L + ! Changed to be uniform distribution in cloud region + + + ! Previous function based on DELTA DISTRIBUTION in cloud: + IF (ZDQS > 0.0_JPRB) THEN + ! If subsidence evaporation term is turned off, then need to use updated + ! liquid and cloud here? + ! ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + ZLEVAP = ZA(JK_I)*MIN(ZDQS, ZLICLD) + ZLEVAP = MIN(ZLEVAP, ZEVAPLIMMIX) + ZLEVAP = MIN(ZLEVAP, MAX(ZQSMIX - ZQX(NCLDQV), 0.0_JPRB)) + + ! For first guess call + ZLEVAPL = ZLIQFRAC*ZLEVAP + ZLEVAPI = ZICEFRAC*ZLEVAP + + ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) + ZLIQFRAC*ZLEVAP + ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) - ZLIQFRAC*ZLEVAP + + ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) + ZICEFRAC*ZLEVAP + ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) - ZICEFRAC*ZLEVAP + + END IF + + + !---------------------------------------------------------------------- + ! 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + !---------------------------------------------------------------------- + ! (1) Increase of cloud water in existing clouds + IF (ZA(JK_I) > ZEPSEC .and. ZDQS <= -YRECLDP%RLMIN) THEN + + ZLCOND1 = MAX(-ZDQS, 0.0_JPRB) !new limiter + + !old limiter (significantly improves upper tropospheric humidity rms) + IF (ZA(JK_I) > 0.99_JPRB) THEN + ZCOR = 1.0_JPRB / (1.0_JPRB - RETV*ZQSMIX) + ZCDMAX = (ZQX(NCLDQV) - ZQSMIX) / (1.0_JPRB + ZCOR*ZQSMIX*FOEDEM(ZTP1(JK_I))) + ELSE + ZCDMAX = (ZQX(NCLDQV) - ZA(JK_I)*ZQSMIX) / ZA(JK_I) + END IF + ZLCOND1 = MAX(MIN(ZLCOND1, ZCDMAX), 0.0_JPRB) + ! end old limiter + + ZLCOND1 = ZA(JK_I)*ZLCOND1 + IF (ZLCOND1 < YRECLDP%RLMIN) ZLCOND1 = 0.0_JPRB + + !------------------------------------------------------------------------- + ! All increase goes into liquid unless so cold cloud homogeneously freezes + ! Include new liquid formation in first guess value, otherwise liquid + ! remains at cold temperatures until next timestep. + !------------------------------------------------------------------------- + IF (ZTP1(JK_I) > YRECLDP%RTHOMO) THEN + ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZLCOND1 + ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZLCOND1 + ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZLCOND1 + ELSE + ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZLCOND1 + ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZLCOND1 + ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZLCOND1 + END IF + END IF + + ! (2) Generation of new clouds (da/dt>0) + + + IF (ZDQS <= -YRECLDP%RLMIN .and. ZA(JK_I) < 1.0_JPRB - ZEPSEC) THEN + + !--------------------------- + ! Critical relative humidity + !--------------------------- + ZRHC = YRECLDP%RAMID + ZSIGK = PAP(JL, JK) / PAPH(JL, KLEV + 1) + ! Increase RHcrit to 1.0 towards the surface (eta>0.8) + IF (ZSIGK > 0.8_JPRB) THEN + ZRHC = YRECLDP%RAMID + (1.0_JPRB - YRECLDP%RAMID)*((ZSIGK - 0.8_JPRB) / 0.2_JPRB)**2 + END IF + + ! Commented out for CY37R1 to reduce humidity in high trop and strat + ! ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + ! ZBOTT=ZTRPAUS(JL)+0.2_JPRB + ! IF(ZSIGK < ZBOTT) THEN + ! ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + ! ENDIF + + !--------------------------- + ! Supersaturation options + !--------------------------- + IF (YRECLDP%NSSOPT == 0) THEN + ! No scheme + ZQE = (ZQX(NCLDQV) - ZA(JK_I)*ZQSICE) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + ZQE = MAX(0.0_JPRB, ZQE) + ELSE IF (YRECLDP%NSSOPT == 1) THEN + ! Tompkins + ZQE = (ZQX(NCLDQV) - ZA(JK_I)*ZQSICE) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + ZQE = MAX(0.0_JPRB, ZQE) + ELSE IF (YRECLDP%NSSOPT == 2) THEN + ! Lohmann and Karcher + ZQE = ZQX(NCLDQV) + ELSE IF (YRECLDP%NSSOPT == 3) THEN + ! Gierens + ZQE = ZQX(NCLDQV) + ZLI + END IF + + IF (ZTP1(JK_I) >= RTT .or. YRECLDP%NSSOPT == 0) THEN + ! No ice supersaturation allowed + ZFAC = 1.0_JPRB + ELSE + ! Ice supersaturation + ZFAC = ZFOKOOP + END IF + + IF (ZQE >= ZRHC*ZQSICE*ZFAC .and. ZQE < ZQSICE*ZFAC) THEN + ! note: not **2 on 1-a term if ZQE is used. + ! Added correction term ZFAC to numerator 15/03/2010 + ZACOND = -((1.0_JPRB - ZA(JK_I))*ZFAC*ZDQS) / MAX(2.0_JPRB*(ZFAC*ZQSICE - ZQE), ZEPSEC) + + ZACOND = MIN(ZACOND, 1.0_JPRB - ZA(JK_I)) !PUT THE LIMITER BACK + + ! Linear term: + ! Added correction term ZFAC 15/03/2010 + ZLCOND2 = -ZFAC*ZDQS*0.5_JPRB*ZACOND !mine linear + + ! new limiter formulation + ZZDL = (2.0_JPRB*(ZFAC*ZQSICE - ZQE)) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + ! Added correction term ZFAC 15/03/2010 + IF (ZFAC*ZDQS < -ZZDL) THEN + ! ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + ZLCONDLIM = (ZA(JK_I) - 1.0_JPRB)*ZFAC*ZDQS - ZFAC*ZQSICE + ZQX(NCLDQV) + ZLCOND2 = MIN(ZLCOND2, ZLCONDLIM) + END IF + ZLCOND2 = MAX(ZLCOND2, 0.0_JPRB) + + IF (ZLCOND2 < YRECLDP%RLMIN .or. (1.0_JPRB - ZA(JK_I)) < ZEPSEC) THEN + ZLCOND2 = 0.0_JPRB + ZACOND = 0.0_JPRB + END IF + IF (ZLCOND2 == 0.0_JPRB) ZACOND = 0.0_JPRB + + ! Large-scale generation is LINEAR in A and LINEAR in L + ZSOLAC = ZSOLAC + ZACOND !linear + + !------------------------------------------------------------------------ + ! All increase goes into liquid unless so cold cloud homogeneously freezes + ! Include new liquid formation in first guess value, otherwise liquid + ! remains at cold temperatures until next timestep. + !------------------------------------------------------------------------ + IF (ZTP1(JK_I) > YRECLDP%RTHOMO) THEN + ZSOLQA(NCLDQL, NCLDQV) = ZSOLQA(NCLDQL, NCLDQV) + ZLCOND2 + ZSOLQA(NCLDQV, NCLDQL) = ZSOLQA(NCLDQV, NCLDQL) - ZLCOND2 + ZQXFG(NCLDQL) = ZQXFG(NCLDQL) + ZLCOND2 + ELSE + ! homogeneous freezing + ZSOLQA(NCLDQI, NCLDQV) = ZSOLQA(NCLDQI, NCLDQV) + ZLCOND2 + ZSOLQA(NCLDQV, NCLDQI) = ZSOLQA(NCLDQV, NCLDQI) - ZLCOND2 + ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZLCOND2 + END IF + + END IF + END IF + + !---------------------------------------------------------------------- + ! 3.7 Growth of ice by vapour deposition + !---------------------------------------------------------------------- + ! Following Rotstayn et al. 2001: + ! does not use the ice nuclei number from cloudaer.F90 + ! but rather a simple Meyers et al. 1992 form based on the + ! supersaturation and assuming clouds are saturated with + ! respect to liquid water (well mixed), (or Koop adjustment) + ! Growth considered as sink of liquid water if present so + ! Bergeron-Findeisen adjustment in autoconversion term no longer needed + !---------------------------------------------------------------------- + + !-------------------------------------------------------- + !- + !- Ice deposition following Rotstayn et al. (2001) + !- (monodisperse ice particle size distribution) + !- + !-------------------------------------------------------- + IF (IDEPICE == 1) THEN + + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JK_IM1) < YRECLDP%RCLDTOPCF .and. ZA(JK_I) >= YRECLDP%RCLDTOPCF) THEN + ZCLDTOPDIST = 0.0_JPRB + ELSE + ZCLDTOPDIST = ZCLDTOPDIST + ZDP / ((ZRHO*RG)) + END IF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JK_I) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN + ! T<273K + + ZVPICE = (FOEEICE(ZTP1(JK_I))*RV) / RD + ZVPLIQ = ZVPICE*ZFOKOOP + ZICENUCLEI = 1000.0_JPRB*EXP((12.96_JPRB*(ZVPLIQ - ZVPICE)) / ZVPLIQ - 0.639_JPRB) + + !------------------------------------------------ + ! 2.4e-2 is conductivity of air + ! 8.8 = 700**1/3 = density of ice to the third + !------------------------------------------------ + ZADD = (RLSTT*(RLSTT / ((RV*ZTP1(JK_I))) - 1.0_JPRB)) / ((2.4E-2_JPRB*ZTP1(JK_I))) + ZBDD = (RV*ZTP1(JK_I)*PAP(JL, JK)) / ((2.21_JPRB*ZVPICE)) + ZCVDS = (7.8_JPRB*(ZICENUCLEI / ZRHO)**0.666_JPRB*(ZVPLIQ - ZVPICE)) / ((8.87_JPRB*(ZADD + ZBDD)*ZVPICE)) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO) + + !------------------ + ! new value of ice: + !------------------ + ZINEW = (0.666_JPRB*ZCVDS*PTSPHY + ZICE0**0.666_JPRB)**1.5_JPRB + + !--------------------------- + ! grid-mean deposition rate: + !--------------------------- + ZDEPOS = MAX(ZA(JK_I)*(ZINEW - ZICE0), 0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS = MIN(ZDEPOS, ZQXFG(NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- + ! ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI / 15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST / & + & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(NCLDQI, NCLDQL) = ZSOLQA(NCLDQI, NCLDQL) + ZDEPOS + ZSOLQA(NCLDQL, NCLDQI) = ZSOLQA(NCLDQL, NCLDQI) - ZDEPOS + ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZDEPOS + ZQXFG(NCLDQL) = ZQXFG(NCLDQL) - ZDEPOS + + END IF + + !-------------------------------------------------------- + !- + !- Ice deposition assuming ice PSD + !- + !-------------------------------------------------------- + ELSE IF (IDEPICE == 2) THEN + + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JK_IM1) < YRECLDP%RCLDTOPCF .and. ZA(JK_I) >= YRECLDP%RCLDTOPCF) THEN + ZCLDTOPDIST = 0.0_JPRB + ELSE + ZCLDTOPDIST = ZCLDTOPDIST + ZDP / ((ZRHO*RG)) + END IF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JK_I) < RTT .and. ZQXFG(NCLDQL) > YRECLDP%RLMIN) THEN + ! T<273K + + ZVPICE = (FOEEICE(ZTP1(JK_I))*RV) / RD + ZVPLIQ = ZVPICE*ZFOKOOP + ZICENUCLEI = 1000.0_JPRB*EXP((12.96_JPRB*(ZVPLIQ - ZVPICE)) / ZVPLIQ - 0.639_JPRB) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0 = MAX(ZICECLD, (ZICENUCLEI*YRECLDP%RICEINIT) / ZRHO) + + ! Particle size distribution + ZTCG = 1.0_JPRB + ZFACX1I = 1.0_JPRB + + ZAPLUSB = & + & YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JK_I) + PAP(JL, JK)*YRECLDP%RCL_APB3*ZTP1(JK_I)**3._JPRB + ZCORRFAC = (1.0_JPRB / ZRHO)**0.5_JPRB + ZCORRFAC2 = ((ZTP1(JK_I) / 273.0_JPRB)**1.5_JPRB)*(393.0_JPRB / (ZTP1(JK_I) + 120.0_JPRB)) + + ZPR02 = (ZRHO*ZICE0*YRECLDP%RCL_CONST1I) / ((ZTCG*ZFACX1I)) + + ZTERM1 = ((ZVPLIQ - ZVPICE)*ZTP1(JK_I)**2.0_JPRB*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2I*ZFACX1I) / & + & ((ZRHO*ZAPLUSB*ZVPICE)) + ZTERM2 = 0.65_JPRB*YRECLDP%RCL_CONST6I*ZPR02**YRECLDP%RCL_CONST4I + & + & (YRECLDP%RCL_CONST3I*ZCORRFAC**0.5_JPRB*ZRHO**0.5_JPRB*ZPR02**YRECLDP%RCL_CONST5I) / ZCORRFAC2**0.5_JPRB + + ZDEPOS = MAX(ZA(JK_I)*ZTERM1*ZTERM2*PTSPHY, 0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS = MIN(ZDEPOS, ZQXFG(NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI / 15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB - ZINFACTOR)*(YRECLDP%RDEPLIQREFRATE + ZCLDTOPDIST / & + & YRECLDP%RDEPLIQREFDEPTH), 1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(NCLDQI, NCLDQL) = ZSOLQA(NCLDQI, NCLDQL) + ZDEPOS + ZSOLQA(NCLDQL, NCLDQI) = ZSOLQA(NCLDQL, NCLDQI) - ZDEPOS + ZQXFG(NCLDQI) = ZQXFG(NCLDQI) + ZDEPOS + ZQXFG(NCLDQL) = ZQXFG(NCLDQL) - ZDEPOS + END IF + + END IF + ! on IDEPICE + + !###################################################################### + ! 4 *** PRECIPITATION PROCESSES *** + !###################################################################### + + !---------------------------------- + ! revise in-cloud consensate amount + !---------------------------------- + ZTMPA = 1.0_JPRB / MAX(ZA(JK_I), ZEPSEC) + ZLIQCLD = ZQXFG(NCLDQL)*ZTMPA + ZICECLD = ZQXFG(NCLDQI)*ZTMPA + ZLICLD = ZLIQCLD + ZICECLD + + !---------------------------------------------------------------------- + ! 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + ! now that rain, snow, graupel species are prognostic + ! the precipitation flux can be defined directly level by level + ! There is no vertical memory required from the flux variable + !---------------------------------------------------------------------- + + DO JM=1,NCLV + IF (LLFALL(JM) .or. JM == NCLDQI) THEN + !------------------------ + ! source from layer above + !------------------------ + IF (JK > YRECLDP%NCLDTOP) THEN + ZFALLSRCE(JM) = ZPFPLSX(JK_I, JM)*ZDTGDP + ZSOLQA(JM, JM) = ZSOLQA(JM, JM) + ZFALLSRCE(JM) + ZQXFG(JM) = ZQXFG(JM) + ZFALLSRCE(JM) + ! use first guess precip----------V + ZQPRETOT = ZQPRETOT + ZQXFG(JM) + END IF + !------------------------------------------------- + ! sink to next layer, constant fall speed + !------------------------------------------------- + ! if aerosol effect then override + ! note that for T>233K this is the same as above. + IF (YRECLDP%LAERICESED .and. JM == NCLDQI) THEN + ZRE_ICE = PRE_ICE(JL, JK) + ! The exponent value is from + ! Morrison et al. JAS 2005 Appendix + ZVQX(NCLDQI) = 0.002_JPRB*ZRE_ICE**1.0_JPRB + END IF + ZFALL = ZVQX(JM)*ZRHO + !------------------------------------------------- + ! modified by Heymsfield and Iaquinta JAS 2000 + !------------------------------------------------- + ! ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + ! &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + ZFALLSINK(JM) = ZDTGDP*ZFALL + ! Cloud budget diagnostic stored at end as implicit + ! jl + END IF + ! LLFALL + END DO + ! jm + + !--------------------------------------------------------------- + ! Precip cover overlap using MAX-RAN Overlap + ! Since precipitation is now prognostic we must + ! 1) apply an arbitrary minimum coverage (0.3) if precip>0 + ! 2) abandon the 2-flux clr/cld treatment + ! 3) Thus, since we have no memory of the clear sky precip + ! fraction, we mimic the previous method by reducing + ! ZCOVPTOT(JL), which has the memory, proportionally with + ! the precip evaporation rate, taking cloud fraction + ! into account + ! #3 above leads to much smoother vertical profiles of + ! precipitation fraction than the Klein-Jakob scheme which + ! monotonically increases precip fraction and then resets + ! it to zero in a step function once clear-sky precip reaches + ! zero. + !--------------------------------------------------------------- + IF (ZQPRETOT > ZEPSEC) THEN + ZCOVPTOT = 1.0_JPRB - ((1.0_JPRB - ZCOVPTOT)*(1.0_JPRB - MAX(ZA(JK_I), ZA(JK_IM1)))) / (1.0_JPRB - MIN(ZA(JK_IM1), 1.0_JPRB - 1.E-06_JPRB)) + ZCOVPTOT = MAX(ZCOVPTOT, YRECLDP%RCOVPMIN) + ZCOVPCLR = MAX(0.0_JPRB, ZCOVPTOT - ZA(JK_I)) ! clear sky proportion + ZRAINCLD = ZQXFG(NCLDQR) / ZCOVPTOT + ZSNOWCLD = ZQXFG(NCLDQS) / ZCOVPTOT + ZCOVPMAX = MAX(ZCOVPTOT, ZCOVPMAX) + ELSE + ZRAINCLD = 0.0_JPRB + ZSNOWCLD = 0.0_JPRB + ZCOVPTOT = 0.0_JPRB ! no flux - reset cover + ZCOVPCLR = 0.0_JPRB ! reset clear sky proportion + ZCOVPMAX = 0.0_JPRB ! reset max cover for ZZRH calc + END IF + + !---------------------------------------------------------------------- + ! 4.3a AUTOCONVERSION TO SNOW + !---------------------------------------------------------------------- + + IF (ZTP1(JK_I) <= RTT) THEN + !----------------------------------------------------- + ! Snow Autoconversion rate follow Lin et al. 1983 + !----------------------------------------------------- + IF (ZICECLD > ZEPSEC) THEN + + ZZCO = PTSPHY*YRECLDP%RSNOWLIN1*EXP(YRECLDP%RSNOWLIN2*(ZTP1(JK_I) - RTT)) + + IF (YRECLDP%LAERICEAUTO) THEN + ZLCRIT = PICRIT_AER(JL, JK) + ! 0.3 = N**0.333 with N=0.027 + ZZCO = ZZCO*(YRECLDP%RNICE / PNICE(JL, JK))**0.333_JPRB + ELSE + ZLCRIT = YRECLDP%RLCRITSNOW + END IF + + ZSNOWAUT = ZZCO*(1.0_JPRB - EXP(-(ZICECLD / ZLCRIT)**2)) + ZSOLQB(NCLDQS, NCLDQI) = ZSOLQB(NCLDQS, NCLDQI) + ZSNOWAUT + + END IF + END IF + + !---------------------------------------------------------------------- + ! 4.3b AUTOCONVERSION WARM CLOUDS + ! Collection and accretion will require separate treatment + ! but for now we keep this simple treatment + !---------------------------------------------------------------------- + + IF (ZLIQCLD > ZEPSEC) THEN + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Sundqvist (1989) + !- + !-------------------------------------------------------- + IF (IWARMRAIN == 1) THEN + + ZZCO = YRECLDP%RKCONV*PTSPHY + + IF (YRECLDP%LAERLIQAUTOLSP) THEN + ZLCRIT = PLCRIT_AER(JL, JK) + ! 0.3 = N**0.333 with N=125 cm-3 + ZZCO = ZZCO*(YRECLDP%RCCN / PCCN(JL, JK))**0.333_JPRB + ELSE + ! Modify autoconversion threshold dependent on: + ! land (polluted, high CCN, smaller droplets, higher threshold) + ! sea (clean, low CCN, larger droplets, lower threshold) + IF (PLSM(JL) > 0.5_JPRB) THEN + ZLCRIT = YRECLDP%RCLCRIT_LAND ! land + ELSE + ZLCRIT = YRECLDP%RCLCRIT_SEA ! ocean + END IF + END IF + + !------------------------------------------------------------------ + ! Parameters for cloud collection by rain and snow. + ! Note that with new prognostic variable it is now possible + ! to REPLACE this with an explicit collection parametrization + !------------------------------------------------------------------ + ZPRECIP = (ZPFPLSX(JK_I, NCLDQS) + ZPFPLSX(JK_I, NCLDQR)) / MAX(ZEPSEC, ZCOVPTOT) + ZCFPR = 1.0_JPRB + YRECLDP%RPRC1*SQRT(MAX(ZPRECIP, 0.0_JPRB)) + ! ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + ! &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + IF (YRECLDP%LAERLIQCOLL) THEN + ! 5.0 = N**0.333 with N=125 cm-3 + ZCFPR = ZCFPR*(YRECLDP%RCCN / PCCN(JL, JK))**0.333_JPRB + END IF + + ZZCO = ZZCO*ZCFPR + ZLCRIT = ZLCRIT / MAX(ZCFPR, ZEPSEC) + + IF (ZLIQCLD / ZLCRIT < 20.0_JPRB) THEN + ! Security for exp for some compilers + ZRAINAUT = ZZCO*(1.0_JPRB - EXP(-(ZLIQCLD / ZLCRIT)**2)) + ELSE + ZRAINAUT = ZZCO + END IF + + ! rain freezes instantly + IF (ZTP1(JK_I) <= RTT) THEN + ZSOLQB(NCLDQS, NCLDQL) = ZSOLQB(NCLDQS, NCLDQL) + ZRAINAUT + ELSE + ZSOLQB(NCLDQR, NCLDQL) = ZSOLQB(NCLDQR, NCLDQL) + ZRAINAUT + END IF + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Khairoutdinov and Kogan (2000) + !- + !-------------------------------------------------------- + ELSE IF (IWARMRAIN == 2) THEN + + IF (PLSM(JL) > 0.5_JPRB) THEN + ! land + ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_LAND + ZLCRIT = YRECLDP%RCLCRIT_LAND + ELSE + ! ocean + ZCONST = YRECLDP%RCL_KK_CLOUD_NUM_SEA + ZLCRIT = YRECLDP%RCLCRIT_SEA + END IF + + IF (ZLIQCLD > ZLCRIT) THEN + + ZRAINAUT = 1.5_JPRB*ZA(JK_I)*PTSPHY*YRECLDP%RCL_KKAAU*ZLIQCLD**YRECLDP%RCL_KKBAUQ*ZCONST**YRECLDP%RCL_KKBAUN + + ZRAINAUT = MIN(ZRAINAUT, ZQXFG(NCLDQL)) + IF (ZRAINAUT < ZEPSEC) ZRAINAUT = 0.0_JPRB + + ZRAINACC = 2.0_JPRB*ZA(JK_I)*PTSPHY*YRECLDP%RCL_KKAAC*(ZLIQCLD*ZRAINCLD)**YRECLDP%RCL_KKBAC + + ZRAINACC = MIN(ZRAINACC, ZQXFG(NCLDQL)) + IF (ZRAINACC < ZEPSEC) ZRAINACC = 0.0_JPRB + + ELSE + ZRAINAUT = 0.0_JPRB + ZRAINACC = 0.0_JPRB + END IF + + ! If temperature < 0, then autoconversion produces snow rather than rain + ! Explicit + IF (ZTP1(JK_I) <= RTT) THEN + ZSOLQA(NCLDQS, NCLDQL) = ZSOLQA(NCLDQS, NCLDQL) + ZRAINAUT + ZSOLQA(NCLDQS, NCLDQL) = ZSOLQA(NCLDQS, NCLDQL) + ZRAINACC + ZSOLQA(NCLDQL, NCLDQS) = ZSOLQA(NCLDQL, NCLDQS) - ZRAINAUT + ZSOLQA(NCLDQL, NCLDQS) = ZSOLQA(NCLDQL, NCLDQS) - ZRAINACC + ELSE + ZSOLQA(NCLDQR, NCLDQL) = ZSOLQA(NCLDQR, NCLDQL) + ZRAINAUT + ZSOLQA(NCLDQR, NCLDQL) = ZSOLQA(NCLDQR, NCLDQL) + ZRAINACC + ZSOLQA(NCLDQL, NCLDQR) = ZSOLQA(NCLDQL, NCLDQR) - ZRAINAUT + ZSOLQA(NCLDQL, NCLDQR) = ZSOLQA(NCLDQL, NCLDQR) - ZRAINACC + END IF + + END IF + ! on IWARMRAIN + + END IF + ! on ZLIQCLD > ZEPSEC + + + !---------------------------------------------------------------------- + ! RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + ! only active if T<0degC and supercooled liquid water is present + ! AND if not Sundquist autoconversion (as this includes riming) + !---------------------------------------------------------------------- + IF (IWARMRAIN > 1) THEN + + IF (ZTP1(JK_I) <= RTT .and. ZLIQCLD > ZEPSEC) THEN + + ! Fallspeed air density correction + ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4_JPRB + + !------------------------------------------------------------------ + ! Riming of snow by cloud water - implicit in lwc + !------------------------------------------------------------------ + IF (ZSNOWCLD > ZEPSEC .and. ZCOVPTOT > 0.01_JPRB) THEN + + ! Calculate riming term + ! Factor of liq water taken out because implicit + ZSNOWRIME = & + & 0.3_JPRB*ZCOVPTOT*PTSPHY*YRECLDP%RCL_CONST7S*ZFALLCORR*(ZRHO*ZSNOWCLD*YRECLDP%RCL_CONST1S)**YRECLDP%RCL_CONST8S + + ! Limit snow riming term + ZSNOWRIME = MIN(ZSNOWRIME, 1.0_JPRB) + + ZSOLQB(NCLDQS, NCLDQL) = ZSOLQB(NCLDQS, NCLDQL) + ZSNOWRIME + + END IF + + !------------------------------------------------------------------ + ! Riming of ice by cloud water - implicit in lwc + ! NOT YET ACTIVE + !------------------------------------------------------------------ + ! IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + ! + ! ! Calculate riming term + ! ! Factor of liq water taken out because implicit + ! ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + ! & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + ! + ! ! Limit ice riming term + ! ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + ! + ! ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + ! + ! ENDIF + END IF + + END IF + ! on IWARMRAIN > 1 + + + !---------------------------------------------------------------------- + ! 4.4a MELTING OF SNOW and ICE + ! with new implicit solver this also has to treat snow or ice + ! precipitating from the level above... i.e. local ice AND flux. + ! in situ ice and snow: could arise from LS advection or warming + ! falling ice and snow: arrives by precipitation process + !---------------------------------------------------------------------- + + ZICETOT = ZQXFG(NCLDQI) + ZQXFG(NCLDQS) + ZMELTMAX = 0.0_JPRB + + ! If there are frozen hydrometeors present and dry-bulb temperature > 0degC + IF (ZICETOT > ZEPSEC .and. ZTP1(JK_I) > RTT) THEN + + ! Calculate subsaturation + ZSUBSAT = MAX(ZQSICE - ZQX(NCLDQV), 0.0_JPRB) + + ! Calculate difference between dry-bulb (ZTP1) and the temperature + ! at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + ! Melting only occurs if the wet-bulb temperature >0 + ! i.e. warming of ice particle due to melting > cooling + ! due to evaporation. + ZTDMTW0 = ZTP1(JK_I) - RTT - ZSUBSAT*(ZTW1 + ZTW2*(PAP(JL, JK) - ZTW3) - ZTW4*(ZTP1(JK_I) - ZTW5)) + ! Not implicit yet... + ! Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*ZTDMTW0)) / YRECLDP%RTAUMEL) + ZMELTMAX = MAX(ZTDMTW0*ZCONS1*ZRLDCP, 0.0_JPRB) + END IF + + ! Loop over frozen hydrometeors (ice, snow) + DO JM=1,NCLV + IF (IPHASE(JM) == 2) THEN + JN = IMELT(JM) + IF (ZMELTMAX > ZEPSEC .and. ZICETOT > ZEPSEC) THEN + ! Apply melting in same proportion as frozen hydrometeor fractions + ZALFA = ZQXFG(JM) / ZICETOT + ZMELT = MIN(ZQXFG(JM), ZALFA*ZMELTMAX) + ! needed in first guess + ! This implies that zqpretot has to be recalculated below + ! since is not conserved here if ice falls and liquid doesn't + ZQXFG(JM) = ZQXFG(JM) - ZMELT + ZQXFG(JN) = ZQXFG(JN) + ZMELT + ZSOLQA(JN, JM) = ZSOLQA(JN, JM) + ZMELT + ZSOLQA(JM, JN) = ZSOLQA(JM, JN) - ZMELT + END IF + END IF + END DO + + !---------------------------------------------------------------------- + ! 4.4b FREEZING of RAIN + !---------------------------------------------------------------------- + + ! If rain present + IF (ZQX(NCLDQR) > ZEPSEC) THEN + + IF (ZTP1(JK_I) <= RTT .and. ZTP1(JK_IM1) > RTT) THEN + ! Base of melting layer/top of refreezing layer so + ! store rain/snow fraction for precip type diagnosis + ! If mostly rain, then supercooled rain slow to freeze + ! otherwise faster to freeze (snow or ice pellets) + ZQPRETOT = MAX(ZQX(NCLDQS) + ZQX(NCLDQR), ZEPSEC) + PRAINFRAC_TOPRFZ(JL) = ZQX(NCLDQR) / ZQPRETOT + IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN + LLRAINLIQ = .true. + ELSE + LLRAINLIQ = .false. + END IF + END IF + + ! If temperature less than zero + IF (ZTP1(JK_I) < RTT) THEN + + IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN + + ! Majority of raindrops completely melted + ! Refreezing is by slow heterogeneous freezing + + ! Slope of rain particle size distribution + ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZQX(NCLDQR))))**YRECLDP%RCL_FAC2 + + ! Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ZTEMP = YRECLDP%RCL_FZRAB*(ZTP1(JK_I) - RTT) + ZFRZ = PTSPHY*(YRECLDP%RCL_CONST5R / ZRHO)*(EXP(ZTEMP) - 1._JPRB)*ZLAMBDA**YRECLDP%RCL_CONST6R + ZFRZMAX = MAX(ZFRZ, 0.0_JPRB) + + ELSE + + ! Majority of raindrops only partially melted + ! Refreeze with a shorter timescale (reverse of melting...for now) + + ZCONS1 = ABS((PTSPHY*(1.0_JPRB + 0.5_JPRB*(RTT - ZTP1(JK_I)))) / YRECLDP%RTAUMEL) + ZFRZMAX = MAX((RTT - ZTP1(JK_I))*ZCONS1*ZRLDCP, 0.0_JPRB) + + END IF + + IF (ZFRZMAX > ZEPSEC) THEN + ZFRZ = MIN(ZQX(NCLDQR), ZFRZMAX) + ZSOLQA(NCLDQS, NCLDQR) = ZSOLQA(NCLDQS, NCLDQR) + ZFRZ + ZSOLQA(NCLDQR, NCLDQS) = ZSOLQA(NCLDQR, NCLDQS) - ZFRZ + END IF + END IF + + END IF + + + !---------------------------------------------------------------------- + ! 4.4c FREEZING of LIQUID + !---------------------------------------------------------------------- + ! not implicit yet... + ZFRZMAX = MAX((YRECLDP%RTHOMO - ZTP1(JK_I))*ZRLDCP, 0.0_JPRB) + + JM = NCLDQL + JN = IMELT(JM) + IF (ZFRZMAX > ZEPSEC .and. ZQXFG(JM) > ZEPSEC) THEN + ZFRZ = MIN(ZQXFG(JM), ZFRZMAX) + ZSOLQA(JN, JM) = ZSOLQA(JN, JM) + ZFRZ + ZSOLQA(JM, JN) = ZSOLQA(JM, JN) - ZFRZ + END IF + + !---------------------------------------------------------------------- + ! 4.5 EVAPORATION OF RAIN/SNOW + !---------------------------------------------------------------------- + + !---------------------------------------- + ! Rain evaporation scheme from Sundquist + !---------------------------------------- + IF (IEVAPRAIN == 1) THEN + + ! Rain + + + ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB) + + ZQE = (ZQX(NCLDQV) - ZA(JK_I)*ZQSLIQ) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + !--------------------------------------------- + ! humidity in moistest ZCOVPCLR part of domain + !--------------------------------------------- + ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSLIQ)) + LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQR) > ZEPSEC .and. ZQE < ZZRH*ZQSLIQ + + IF (LLO1) THEN + ! note: zpreclr is a rain flux + ZPRECLR = (ZQXFG(NCLDQR)*ZCOVPCLR) / SIGN(MAX(ABS(ZCOVPTOT*ZDTGDP), ZEPSILON), ZCOVPTOT*ZDTGDP) + + !-------------------------------------- + ! actual microphysics formula in zbeta + !-------------------------------------- + + ZBETA1 = ((SQRT(PAP(JL, JK) / PAPH(JL, KLEV + 1)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC) + + ZBETA = RG*YRECLDP%RPECONS*0.5_JPRB*ZBETA1**0.5777_JPRB + + ZDENOM = 1.0_JPRB + ZBETA*PTSPHY*ZCORQSLIQ + ZDPR = ((ZCOVPCLR*ZBETA*(ZQSLIQ - ZQE)) / ZDENOM)*ZDP*ZRG_R + ZDPEVAP = ZDPR*ZDTGDP + + !--------------------------------------------------------- + ! add evaporation term to explicit sink. + ! this has to be explicit since if treated in the implicit + ! term evaporation can not reduce rain to zero and model + ! produces small amounts of rainfall everywhere. + !--------------------------------------------------------- + + ! Evaporate rain + ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQR)) + + ZSOLQA(NCLDQV, NCLDQR) = ZSOLQA(NCLDQV, NCLDQR) + ZEVAP + ZSOLQA(NCLDQR, NCLDQV) = ZSOLQA(NCLDQR, NCLDQV) - ZEVAP + + !------------------------------------------------------------- + ! Reduce the total precip coverage proportional to evaporation + ! to mimic the previous scheme which had a diagnostic + ! 2-flux treatment, abandoned due to the new prognostic precip + !------------------------------------------------------------- + ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK_I))*ZEVAP) / ZQXFG(NCLDQR))) + + ! Update fg field + ZQXFG(NCLDQR) = ZQXFG(NCLDQR) - ZEVAP + + END IF + + + !--------------------------------------------------------- + ! Rain evaporation scheme based on Abel and Boutle (2013) + !--------------------------------------------------------- + ELSE IF (IEVAPRAIN == 2) THEN + + + !----------------------------------------------------------------------- + ! Calculate relative humidity limit for rain evaporation + ! to avoid cloud formation and saturation of the grid box + !----------------------------------------------------------------------- + ! Limit RH for rain evaporation dependent on precipitation fraction + ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB) + + ! Critical relative humidity + !ZRHC=RAMID + !ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + ! Increase RHcrit to 1.0 towards the surface (eta>0.8) + !IF(ZSIGK > 0.8_JPRB) THEN + ! ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + !ENDIF + !ZZRH = MIN(ZRHC,ZZRH) + + ! Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + ZZRH = MIN(0.8_JPRB, ZZRH) + + ZQE = MAX(0.0_JPRB, MIN(ZQX(NCLDQV), ZQSLIQ)) + + LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQR) > ZEPSEC .and. ZQE < ZZRH*ZQSLIQ + + IF (LLO1) THEN + + !------------------------------------------- + ! Abel and Boutle (2012) evaporation + !------------------------------------------- + ! Calculate local precipitation (kg/kg) + ZPRECLR = ZQXFG(NCLDQR) / ZCOVPTOT + + ! Fallspeed air density correction + ZFALLCORR = (YRECLDP%RDENSREF / ZRHO)**0.4 + + ! Saturation vapour pressure with respect to liquid phase + ZESATLIQ = (RV / RD)*FOEELIQ(ZTP1(JK_I)) + + ! Slope of particle size distribution + ZLAMBDA = (YRECLDP%RCL_FAC1 / ((ZRHO*ZPRECLR)))**YRECLDP%RCL_FAC2 ! ZPRECLR=kg/kg + + ZEVAP_DENOM = YRECLDP%RCL_CDENOM1*ZESATLIQ - YRECLDP%RCL_CDENOM2*ZTP1(JK_I)*ZESATLIQ + YRECLDP%RCL_CDENOM3*ZTP1( & + & JK)**3._JPRB*PAP(JL, JK) + + ! Temperature dependent conductivity + ZCORR2 = ((ZTP1(JK_I) / 273._JPRB)**1.5_JPRB*393._JPRB) / (ZTP1(JK_I) + 120._JPRB) + ZKA = YRECLDP%RCL_KA273*ZCORR2 + + ZSUBSAT = MAX(ZZRH*ZQSLIQ - ZQE, 0.0_JPRB) + + ZBETA = (0.5_JPRB / ZQSLIQ)*ZTP1(JK_I)**2._JPRB*ZESATLIQ*YRECLDP%RCL_CONST1R*(ZCORR2 / & + & ZEVAP_DENOM)*(0.78_JPRB / (ZLAMBDA**YRECLDP%RCL_CONST4R) + (YRECLDP%RCL_CONST2R*(ZRHO*ZFALLCORR)**0.5_JPRB) / & + & ((ZCORR2**0.5_JPRB*ZLAMBDA**YRECLDP%RCL_CONST3R))) + + ZDENOM = 1.0_JPRB + ZBETA*PTSPHY !*ZCORQSLIQ(JL) + ZDPEVAP = (ZCOVPCLR*ZBETA*PTSPHY*ZSUBSAT) / ZDENOM + + !--------------------------------------------------------- + ! Add evaporation term to explicit sink. + ! this has to be explicit since if treated in the implicit + ! term evaporation can not reduce rain to zero and model + ! produces small amounts of rainfall everywhere. + !--------------------------------------------------------- + + ! Limit rain evaporation + ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQR)) + + ZSOLQA(NCLDQV, NCLDQR) = ZSOLQA(NCLDQV, NCLDQR) + ZEVAP + ZSOLQA(NCLDQR, NCLDQV) = ZSOLQA(NCLDQR, NCLDQV) - ZEVAP + + !------------------------------------------------------------- + ! Reduce the total precip coverage proportional to evaporation + ! to mimic the previous scheme which had a diagnostic + ! 2-flux treatment, abandoned due to the new prognostic precip + !------------------------------------------------------------- + ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK_I))*ZEVAP) / ZQXFG(NCLDQR))) + + ! Update fg field + ZQXFG(NCLDQR) = ZQXFG(NCLDQR) - ZEVAP + + END IF + + END IF + ! on IEVAPRAIN + + !---------------------------------------------------------------------- + ! 4.5 EVAPORATION OF SNOW + !---------------------------------------------------------------------- + ! Snow + IF (IEVAPSNOW == 1) THEN + + ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB) + ZQE = (ZQX(NCLDQV) - ZA(JK_I)*ZQSICE) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + + !--------------------------------------------- + ! humidity in moistest ZCOVPCLR part of domain + !--------------------------------------------- + ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSICE)) + LLO1 = ZCOVPCLR > ZEPSEC .and. ZQXFG(NCLDQS) > ZEPSEC .and. ZQE < ZZRH*ZQSICE + + IF (LLO1) THEN + ! note: zpreclr is a rain flux a + ZPRECLR = (ZQXFG(NCLDQS)*ZCOVPCLR) / SIGN(MAX(ABS(ZCOVPTOT*ZDTGDP), ZEPSILON), ZCOVPTOT*ZDTGDP) + + !-------------------------------------- + ! actual microphysics formula in zbeta + !-------------------------------------- + + ZBETA1 = ((SQRT(PAP(JL, JK) / PAPH(JL, KLEV + 1)) / YRECLDP%RVRFACTOR)*ZPRECLR) / MAX(ZCOVPCLR, ZEPSEC) + + ZBETA = RG*YRECLDP%RPECONS*ZBETA1**0.5777_JPRB + + ZDENOM = 1.0_JPRB + ZBETA*PTSPHY*ZCORQSICE + ZDPR = ((ZCOVPCLR*ZBETA*(ZQSICE - ZQE)) / ZDENOM)*ZDP*ZRG_R + ZDPEVAP = ZDPR*ZDTGDP + + !--------------------------------------------------------- + ! add evaporation term to explicit sink. + ! this has to be explicit since if treated in the implicit + ! term evaporation can not reduce snow to zero and model + ! produces small amounts of snowfall everywhere. + !--------------------------------------------------------- + + ! Evaporate snow + ZEVAP = MIN(ZDPEVAP, ZQXFG(NCLDQS)) + + ZSOLQA(NCLDQV, NCLDQS) = ZSOLQA(NCLDQV, NCLDQS) + ZEVAP + ZSOLQA(NCLDQS, NCLDQV) = ZSOLQA(NCLDQS, NCLDQV) - ZEVAP + + !------------------------------------------------------------- + ! Reduce the total precip coverage proportional to evaporation + ! to mimic the previous scheme which had a diagnostic + ! 2-flux treatment, abandoned due to the new prognostic precip + !------------------------------------------------------------- + ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK_I))*ZEVAP) / ZQXFG(NCLDQS))) + + !Update first guess field + ZQXFG(NCLDQS) = ZQXFG(NCLDQS) - ZEVAP + + END IF + !--------------------------------------------------------- + ELSE IF (IEVAPSNOW == 2) THEN + + + + !----------------------------------------------------------------------- + ! Calculate relative humidity limit for snow evaporation + !----------------------------------------------------------------------- + ZZRH = YRECLDP%RPRECRHMAX + ((1.0_JPRB - YRECLDP%RPRECRHMAX)*ZCOVPMAX) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + ZZRH = MIN(MAX(ZZRH, YRECLDP%RPRECRHMAX), 1.0_JPRB) + ZQE = (ZQX(NCLDQV) - ZA(JK_I)*ZQSICE) / MAX(ZEPSEC, 1.0_JPRB - ZA(JK_I)) + + !--------------------------------------------- + ! humidity in moistest ZCOVPCLR part of domain + !--------------------------------------------- + ZQE = MAX(0.0_JPRB, MIN(ZQE, ZQSICE)) + LLO1 = ZCOVPCLR > ZEPSEC .and. ZQX(NCLDQS) > ZEPSEC .and. ZQE < ZZRH*ZQSICE + + IF (LLO1) THEN + + ! Calculate local precipitation (kg/kg) + ZPRECLR = ZQX(NCLDQS) / ZCOVPTOT + ZVPICE = (FOEEICE(ZTP1(JK_I))*RV) / RD + + ! Particle size distribution + ! ZTCG increases Ni with colder temperatures - essentially a + ! Fletcher or Meyers scheme? + ZTCG = 1.0_JPRB !v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + ! ZFACX1I modification is based on Andrew Barrett's results + ZFACX1S = 1.0_JPRB !v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + ZAPLUSB = YRECLDP%RCL_APB1*ZVPICE - YRECLDP%RCL_APB2*ZVPICE*ZTP1(JK_I) + PAP(JL, JK)*YRECLDP%RCL_APB3*ZTP1(JK_I)**3 + ZCORRFAC = (1.0 / ZRHO)**0.5 + ZCORRFAC2 = ((ZTP1(JK_I) / 273.0)**1.5)*(393.0 / (ZTP1(JK_I) + 120.0)) + + ZPR02 = (ZRHO*ZPRECLR*YRECLDP%RCL_CONST1S) / ((ZTCG*ZFACX1S)) + + ZTERM1 = ((ZQSICE - ZQE)*ZTP1(JK_I)**2*ZVPICE*ZCORRFAC2*ZTCG*YRECLDP%RCL_CONST2S*ZFACX1S) / & + & ((ZRHO*ZAPLUSB*ZQSICE)) + ZTERM2 = 0.65*YRECLDP%RCL_CONST6S*ZPR02**YRECLDP%RCL_CONST4S + & + & (YRECLDP%RCL_CONST3S*ZCORRFAC**0.5*ZRHO**0.5*ZPR02**YRECLDP%RCL_CONST5S) / ZCORRFAC2**0.5 + + ZDPEVAP = MAX(ZCOVPCLR*ZTERM1*ZTERM2*PTSPHY, 0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit evaporation to snow amount + !-------------------------------------------------------------------- + ZEVAP = MIN(ZDPEVAP, ZEVAPLIMICE) + ZEVAP = MIN(ZEVAP, ZQX(NCLDQS)) + + + ZSOLQA(NCLDQV, NCLDQS) = ZSOLQA(NCLDQV, NCLDQS) + ZEVAP + ZSOLQA(NCLDQS, NCLDQV) = ZSOLQA(NCLDQS, NCLDQV) - ZEVAP + + !------------------------------------------------------------- + ! Reduce the total precip coverage proportional to evaporation + ! to mimic the previous scheme which had a diagnostic + ! 2-flux treatment, abandoned due to the new prognostic precip + !------------------------------------------------------------- + ZCOVPTOT = MAX(YRECLDP%RCOVPMIN, ZCOVPTOT - MAX(0.0_JPRB, ((ZCOVPTOT - ZA(JK_I))*ZEVAP) / ZQX(NCLDQS))) + + !Update first guess field + ZQXFG(NCLDQS) = ZQXFG(NCLDQS) - ZEVAP + + END IF + + END IF + ! on IEVAPSNOW + + !-------------------------------------- + ! Evaporate small precipitation amounts + !-------------------------------------- + DO JM=1,NCLV + IF (LLFALL(JM)) THEN + IF (ZQXFG(JM) < YRECLDP%RLMIN) THEN + ZSOLQA(NCLDQV, JM) = ZSOLQA(NCLDQV, JM) + ZQXFG(JM) + ZSOLQA(JM, NCLDQV) = ZSOLQA(JM, NCLDQV) - ZQXFG(JM) + END IF + END IF + END DO + + !###################################################################### + ! 5.0 *** SOLVERS FOR A AND L *** + ! now use an implicit solution rather than exact solution + ! solver is forward in time, upstream difference for advection + !###################################################################### + + !--------------------------- + ! 5.1 solver for cloud cover + !--------------------------- + ZANEW = (ZA(JK_I) + ZSOLAC) / (1.0_JPRB + ZSOLAB) + ZANEW = MIN(ZANEW, 1.0_JPRB) + IF (ZANEW < YRECLDP%RAMIN) ZANEW = 0.0_JPRB + ZDA = ZANEW - ZAORIG + !--------------------------------- + ! variables needed for next level + !--------------------------------- + ZANEWM1 = ZANEW + + !-------------------------------- + ! 5.2 solver for the microphysics + !-------------------------------- + + !-------------------------------------------------------------- + ! Truncate explicit sinks to avoid negatives + ! Note: Species are treated in the order in which they run out + ! since the clipping will alter the balance for the other vars + !-------------------------------------------------------------- + + DO JM=1,NCLV +!$claw nodep + DO JN=1,NCLV + LLINDEX3(JN, JM) = .false. + END DO + ZSINKSUM(JM) = 0.0_JPRB + END DO + + !---------------------------- + ! collect sink terms and mark + !---------------------------- + DO JM=1,NCLV + DO JN=1,NCLV + ZSINKSUM(JM) = ZSINKSUM(JM) - ZSOLQA(JM, JN) ! +ve total is bad + END DO + END DO + + !--------------------------------------- + ! calculate overshoot and scaling factor + !--------------------------------------- + DO JM=1,NCLV + ZMAX = MAX(ZQX(JM), ZEPSEC) + ZRAT = MAX(ZSINKSUM(JM), ZMAX) + ZRATIO(JM) = ZMAX / ZRAT + END DO + + !-------------------------------------------- + ! scale the sink terms, in the correct order, + ! recalculating the scale factor each time + !-------------------------------------------- + DO JM=1,NCLV + ZSINKSUM(JM) = 0.0_JPRB + END DO + + !---------------- + ! recalculate sum + !---------------- + DO JM=1,NCLV + PSUM_SOLQA = 0.0 + DO JN=1,NCLV + PSUM_SOLQA = PSUM_SOLQA + ZSOLQA(JM, JN) + END DO + ! ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + ZSINKSUM(JM) = ZSINKSUM(JM) - PSUM_SOLQA + !--------------------------- + ! recalculate scaling factor + !--------------------------- + ZMM = MAX(ZQX(JM), ZEPSEC) + ZRR = MAX(ZSINKSUM(JM), ZMM) + ZRATIO(JM) = ZMM / ZRR + !------ + ! scale + !------ + ZZRATIO = ZRATIO(JM) + !DIR$ IVDEP + !DIR$ PREFERVECTOR + DO JN=1,NCLV + IF (ZSOLQA(JM, JN) < 0.0_JPRB) THEN + ZSOLQA(JM, JN) = ZSOLQA(JM, JN)*ZZRATIO + ZSOLQA(JN, JM) = ZSOLQA(JN, JM)*ZZRATIO + END IF + END DO + END DO + + !-------------------------------------------------------------- + ! 5.2.2 Solver + !------------------------ + + !------------------------ + ! set the LHS of equation + !------------------------ + DO JM=1,NCLV + DO JN=1,NCLV + !---------------------------------------------- + ! diagonals: microphysical sink terms+transport + !---------------------------------------------- + IF (JN == JM) THEN + ZQLHS(JN, JM) = 1.0_JPRB + ZFALLSINK(JM) + DO JO=1,NCLV + ZQLHS(JN, JM) = ZQLHS(JN, JM) + ZSOLQB(JO, JN) + END DO + !------------------------------------------ + ! non-diagonals: microphysical source terms + !------------------------------------------ + ELSE + ZQLHS(JN, JM) = -ZSOLQB(JN, JM) ! here is the delta T - missing from doc. + END IF + END DO + END DO + + !------------------------ + ! set the RHS of equation + !------------------------ + DO JM=1,NCLV + !--------------------------------- + ! sum the explicit source and sink + !--------------------------------- + ZEXPLICIT = 0.0_JPRB + DO JN=1,NCLV + ZEXPLICIT = ZEXPLICIT + ZSOLQA(JM, JN) ! sum over middle index + END DO + ZQXN(JM) = ZQX(JM) + ZEXPLICIT + END DO + + !----------------------------------- + ! *** solve by LU decomposition: *** + !----------------------------------- + + ! Note: This fast way of solving NCLVxNCLV system + ! assumes a good behaviour (i.e. non-zero diagonal + ! terms with comparable orders) of the matrix stored + ! in ZQLHS. For the moment this is the case but + ! be aware to preserve it when doing eventual + ! modifications. + + ! Non pivoting recursive factorization + DO JN=1,NCLV - 1 + ! number of steps + DO JM=JN + 1,NCLV + ! row index + ZQLHS(JM, JN) = ZQLHS(JM, JN) / ZQLHS(JN, JN) + DO IK=JN + 1,NCLV + ! column index + ZQLHS(JM, IK) = ZQLHS(JM, IK) - ZQLHS(JM, JN)*ZQLHS(JN, IK) + END DO + END DO + END DO + + ! Backsubstitution + ! step 1 + DO JN=2,NCLV + DO JM=1,JN - 1 + ZQXN(JN) = ZQXN(JN) - ZQLHS(JN, JM)*ZQXN(JM) + END DO + END DO + ! step 2 + ZQXN(NCLV) = ZQXN(NCLV) / ZQLHS(NCLV, NCLV) + DO JN=NCLV - 1,1,-1 + DO JM=JN + 1,NCLV + ZQXN(JN) = ZQXN(JN) - ZQLHS(JN, JM)*ZQXN(JM) + END DO + ZQXN(JN) = ZQXN(JN) / ZQLHS(JN, JN) + END DO + + ! Ensure no small values (including negatives) remain in cloud variables nor + ! precipitation rates. + ! Evaporate l,i,r,s to water vapour. Latent heating taken into account below + DO JN=1,NCLV - 1 + IF (ZQXN(JN) < ZEPSEC) THEN + ZQXN(NCLDQV) = ZQXN(NCLDQV) + ZQXN(JN) + ZQXN(JN) = 0.0_JPRB + END IF + END DO + + !-------------------------------- + ! variables needed for next level + !-------------------------------- + DO JM=1,NCLV + ZQXNM1(JM) = ZQXN(JM) + ZQXN2D(JM) = ZQXN(JM) + END DO + + !------------------------------------------------------------------------ + ! 5.3 Precipitation/sedimentation fluxes to next level + ! diagnostic precipitation fluxes + ! It is this scaled flux that must be used for source to next layer + !------------------------------------------------------------------------ + + DO JM=1,NCLV + ZPFPLSX(JK_IP1, JM) = ZFALLSINK(JM)*ZQXN(JM)*ZRDTGDP + END DO + + ! Ensure precipitation fraction is zero if no precipitation + ZQPRETOT = ZPFPLSX(JK_IP1, NCLDQS) + ZPFPLSX(JK_IP1, NCLDQR) + IF (ZQPRETOT < ZEPSEC) THEN + ZCOVPTOT = 0.0_JPRB + END IF + + !###################################################################### + ! 6 *** UPDATE TENDANCIES *** + !###################################################################### + + !-------------------------------- + ! 6.1 Temperature and CLV budgets + !-------------------------------- + + DO JM=1,NCLV - 1 + + ! calculate fluxes in and out of box for conservation of TL + ZFLUXQ(JM) = ZPSUPSATSRCE(JM) + ZCONVSRCE(JM) + ZFALLSRCE(JM) - (ZFALLSINK(JM) + ZCONVSINK(JM))*ZQXN(JM) + + IF (IPHASE(JM) == 1) THEN + TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) + RALVDCP*(ZQXN(JM) - ZQX(JM) - ZFLUXQ(JM))*ZQTMST + END IF + + IF (IPHASE(JM) == 2) THEN + TENDENCY_LOC_T(JL, JK) = TENDENCY_LOC_T(JL, JK) + RALSDCP*(ZQXN(JM) - ZQX(JM) - ZFLUXQ(JM))*ZQTMST + END IF + + !---------------------------------------------------------------------- + ! New prognostic tendencies - ice,liquid rain,snow + ! Note: CLV arrays use PCLV in calculation of tendency while humidity + ! uses ZQX. This is due to clipping at start of cloudsc which + ! include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + !---------------------------------------------------------------------- + TENDENCY_LOC_CLD(JL, JK, JM) = TENDENCY_LOC_CLD(JL, JK, JM) + (ZQXN(JM) - ZQX0(JM))*ZQTMST + + END DO + + !---------------------- + ! 6.2 Humidity budget + !---------------------- + TENDENCY_LOC_q(JL, JK) = TENDENCY_LOC_Q(JL, JK) + (ZQXN(NCLDQV) - ZQX(NCLDQV))*ZQTMST + + !------------------- + ! 6.3 cloud cover + !----------------------- + TENDENCY_LOC_a(JL, JK) = TENDENCY_LOC_A(JL, JK) + ZDA*ZQTMST + + !-------------------------------------------------- + ! Copy precipitation fraction into output variable + !------------------------------------------------- + PCOVPTOT(JL, JK) = ZCOVPTOT + + END IF + + END IF + + ! on vertical level JK + !---------------------------------------------------------------------- + ! END OF VERTICAL LOOP + !---------------------------------------------------------------------- + + !###################################################################### + ! 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + !###################################################################### + + !-------------------------------------------------------------------- + ! Copy general precip arrays back into PFP arrays for GRIB archiving + ! Add rain and liquid fluxes, ice and snow fluxes + !-------------------------------------------------------------------- + PFPLSL(JL, JK) = ZPFPLSX(JK_I, NCLDQR) + ZPFPLSX(JK_I, NCLDQL) + PFPLSN(JL, JK) = ZPFPLSX(JK_I, NCLDQS) + ZPFPLSX(JK_I, NCLDQI) + + if (1<=JK .AND. JK<=KLEV) THEN + + ZGDPH_R = -ZRG_R*(PAPH(JL, JK + 1) - PAPH(JL, JK))*ZQTMST + PFSQLF(JL, JK + 1) = PFSQLF(JL, JK) + PFSQIF(JL, JK + 1) = PFSQIF(JL, JK) + PFSQRF(JL, JK + 1) = PFSQLF(JL, JK) + PFSQSF(JL, JK + 1) = PFSQIF(JL, JK) + PFCQLNG(JL, JK + 1) = PFCQLNG(JL, JK) + PFCQNNG(JL, JK + 1) = PFCQNNG(JL, JK) + PFCQRNG(JL, JK + 1) = PFCQLNG(JL, JK) + PFCQSNG(JL, JK + 1) = PFCQNNG(JL, JK) + PFSQLTUR(JL, JK + 1) = PFSQLTUR(JL, JK) + PFSQITUR(JL, JK + 1) = PFSQITUR(JL, JK) + + ZALFAW = ZFOEALFA + + ! Liquid , LS scheme minus detrainment + PFSQLF(JL, JK + 1) = & + & PFSQLF(JL, JK + 1) + (ZQXN2D(NCLDQL) - ZQX0(NCLDQL) + PVFL(JL, JK)*PTSPHY - ZALFAW*PLUDE(JL, JK))*ZGDPH_R + ! liquid, negative numbers + PFCQLNG(JL, JK + 1) = PFCQLNG(JL, JK + 1) + ZLNEG(NCLDQL)*ZGDPH_R + + ! liquid, vertical diffusion + PFSQLTUR(JL, JK + 1) = PFSQLTUR(JL, JK + 1) + PVFL(JL, JK)*PTSPHY*ZGDPH_R + + ! Rain, LS scheme + PFSQRF(JL, JK + 1) = PFSQRF(JL, JK + 1) + (ZQXN2D(NCLDQR) - ZQX0(NCLDQR))*ZGDPH_R + ! rain, negative numbers + PFCQRNG(JL, JK + 1) = PFCQRNG(JL, JK + 1) + ZLNEG(NCLDQR)*ZGDPH_R + + ! Ice , LS scheme minus detrainment + PFSQIF(JL, JK + 1) = PFSQIF(JL, JK + 1) + (ZQXN2D(NCLDQI) - ZQX0(NCLDQI) + PVFI(JL, JK)*PTSPHY - (1.0_JPRB & + & - ZALFAW)*PLUDE(JL, JK))*ZGDPH_R + ! ice, negative numbers + PFCQNNG(JL, JK + 1) = PFCQNNG(JL, JK + 1) + ZLNEG(NCLDQI)*ZGDPH_R + + ! ice, vertical diffusion + PFSQITUR(JL, JK + 1) = PFSQITUR(JL, JK + 1) + PVFI(JL, JK)*PTSPHY*ZGDPH_R + + ! snow, LS scheme + PFSQSF(JL, JK + 1) = PFSQSF(JL, JK + 1) + (ZQXN2D(NCLDQS) - ZQX0(NCLDQS))*ZGDPH_R + ! snow, negative numbers + PFCQSNG(JL, JK + 1) = PFCQSNG(JL, JK + 1) + ZLNEG(NCLDQS)*ZGDPH_R + + END IF + + !----------------------------------- + ! enthalpy flux due to precipitation + !----------------------------------- + PFHPSL(JL, JK) = -RLVTT*PFPLSL(JL, JK) + PFHPSN(JL, JK) = -RLSTT*PFPLSN(JL, JK) + END DO + + !=============================================================================== + !IF (LHOOK) CALL DR_HOOK('CLOUDSC',1,ZHOOK_HANDLE) + END SUBROUTINE CLOUDSC_SCC_K_CACHING +END MODULE CLOUDSC_GPU_SCC_K_CACHING_MOD diff --git a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 index bef2a13b..fbc5e321 100644 --- a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 +++ b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 @@ -33,6 +33,10 @@ PROGRAM DWARF_CLOUDSC USE CLOUDSC_DRIVER_GPU_SCC_HOIST_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_HOIST #endif +#ifdef CLOUDSC_GPU_SCC_K_CACHING +USE CLOUDSC_DRIVER_GPU_SCC_K_CACHING_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_K_CACHING +#endif + #ifdef CLOUDSC_GPU_OMP_SCC_HOIST USE CLOUDSC_DRIVER_GPU_OMP_SCC_HOIST_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_HOIST #endif @@ -247,6 +251,33 @@ PROGRAM DWARF_CLOUDSC & ) #endif +#if defined(CLOUDSC_GPU_SCC_K_CACHING) +print '(1X,A42)', 'Executing CLOUDSC-GPU, "SCC-k-caching" variant...' + + ! Call the driver to perform the parallel loop over our kernel +CALL CLOUDSC_DRIVER_GPU_SCC_K_CACHING(NUMOMP, NPROMA, GLOBAL_STATE%KLEV, NGPTOT, GLOBAL_STATE%NBLOCKS, NGPTOTG, & + & GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, & + & GLOBAL_STATE%PT, GLOBAL_STATE%PQ, & + & GLOBAL_STATE%B_CML, GLOBAL_STATE%B_TMP, GLOBAL_STATE%B_LOC, & + & GLOBAL_STATE%PVFA, GLOBAL_STATE%PVFL, GLOBAL_STATE%PVFI, & + & GLOBAL_STATE%PDYNA, GLOBAL_STATE%PDYNL, GLOBAL_STATE%PDYNI, & + & GLOBAL_STATE%PHRSW, GLOBAL_STATE%PHRLW, & + & GLOBAL_STATE%PVERVEL, GLOBAL_STATE%PAP, GLOBAL_STATE%PAPH, & + & GLOBAL_STATE%PLSM, GLOBAL_STATE%LDCUM, GLOBAL_STATE%KTYPE, & + & GLOBAL_STATE%PLU, GLOBAL_STATE%PLUDE, GLOBAL_STATE%PSNDE, & + & GLOBAL_STATE%PMFU, GLOBAL_STATE%PMFD, & + & GLOBAL_STATE%PA, & + & GLOBAL_STATE%PCLV, GLOBAL_STATE%PSUPSAT,& + & GLOBAL_STATE%PLCRIT_AER, GLOBAL_STATE%PICRIT_AER, GLOBAL_STATE%PRE_ICE, & + & GLOBAL_STATE%PCCN, GLOBAL_STATE%PNICE,& + & GLOBAL_STATE%PCOVPTOT, GLOBAL_STATE%PRAINFRAC_TOPRFZ, & + & GLOBAL_STATE%PFSQLF, GLOBAL_STATE%PFSQIF , GLOBAL_STATE%PFCQNNG, GLOBAL_STATE%PFCQLNG, & + & GLOBAL_STATE%PFSQRF, GLOBAL_STATE%PFSQSF , GLOBAL_STATE%PFCQRNG, GLOBAL_STATE%PFCQSNG, & + & GLOBAL_STATE%PFSQLTUR, GLOBAL_STATE%PFSQITUR, & + & GLOBAL_STATE%PFPLSL, GLOBAL_STATE%PFPLSN, GLOBAL_STATE%PFHPSL, GLOBAL_STATE%PFHPSN & + & ) +#endif + #ifdef CLOUDSC_GPU_SCC_FIELD print *, 'Executing CLOUDSC-GPU, "SCC" variant with FIELD API, PACKED STORAGE', USE_PACKED From 21bdd3ab4db9511c4f404e8c27226077338e3a91 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Fri, 17 Mar 2023 09:49:57 +0000 Subject: [PATCH 012/174] updated GitHub actions, targets to be verified and run --- .github/scripts/run-targets.sh | 3 ++- .github/scripts/verify-targets.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/scripts/run-targets.sh b/.github/scripts/run-targets.sh index 41aba018..77b16da4 100755 --- a/.github/scripts/run-targets.sh +++ b/.github/scripts/run-targets.sh @@ -11,7 +11,8 @@ skipped_targets=(dwarf-cloudsc-gpu-claw) if [[ "$arch" == *"nvhpc"* ]] then # Skip GPU targets if built with nvhpc (don't have GPU in test runner) - skipped_targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-omp-scc-hoist dwarf-cloudsc-gpu-scc-field) + skipped_targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) + skipped_targets+=(dwarf-cloudsc-gpu-omp-scc-hoist dwarf-cloudsc-gpu-scc-field) # Skip GPU targets from Loki if built with nvhpc (don't have GPU in test runner) skipped_targets+=(dwarf-cloudsc-loki-claw-gpu dwarf-cloudsc-loki-scc dwarf-cloudsc-loki-scc-hoist) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index c1021f6e..9571b55d 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -17,7 +17,8 @@ fi if [[ "$gpu_flag" == "--with-gpu" ]] then - targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-omp-scc-hoist) + targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) + targets+=(dwarf-cloudsc-gpu-omp-scc-hoist) if [[ "$claw_flag" == "--with-claw" ]] then targets+=(dwarf-cloudsc-gpu-claw) From 5eefb42c77f500a237d816279cf5a9109de29e9b Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 21 Mar 2023 14:09:12 +0000 Subject: [PATCH 013/174] CMake improvements to address PR review --- CMakeLists.txt | 2 +- bundle.yml | 6 ------ src/cloudsc_gpu/CMakeLists.txt | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 18489907..5b159d88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,7 +96,7 @@ ecbuild_add_option( FEATURE FIELD_API DESCRIPTION "Use field_api to manage GPU data offload and copyback" REQUIRED_PACKAGES "field_api" CONDITION HAVE_CUDA - DEFAULT OFF ) + DEFAULT ON ) ecbuild_find_package( NAME loki ) diff --git a/bundle.yml b/bundle.yml index 8da185e3..f6a0df8d 100644 --- a/bundle.yml +++ b/bundle.yml @@ -76,8 +76,6 @@ options : ENABLE_CUDA=ON ENABLE_CLOUDSC_GPU_SCC_CUF=ON ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON - ENABLE_CLOUDSC_GPU_SCC_FIELD=ON - ENABLE_FIELD_API=ON BUILD_field_api=ON - with-mpi : @@ -107,10 +105,6 @@ options : help : Frontend parser to use for Loki transformations cmake : LOKI_FRONTEND={{value}} - - field_api-testing : - help : Enable field_api testing - cmake : ENABLE_FIELD_API_TESTS=ON - - cloudsc-prototype1 : help : Build the original operational Fortran prototype [ON|OFF] cmake : ENABLE_CLOUDSC_PROTOTYPE1={{value}} diff --git a/src/cloudsc_gpu/CMakeLists.txt b/src/cloudsc_gpu/CMakeLists.txt index 25703d1b..8fcfed82 100644 --- a/src/cloudsc_gpu/CMakeLists.txt +++ b/src/cloudsc_gpu/CMakeLists.txt @@ -44,7 +44,7 @@ ecbuild_add_option( FEATURE CLOUDSC_GPU_OMP_SCC_HOIST ) ecbuild_add_option( FEATURE CLOUDSC_GPU_SCC_FIELD - DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC with FIELD API" DEFAULT OFF + DESCRIPTION "Build optimized GPU version of CLOUDSC using SCC with FIELD API" DEFAULT ON CONDITION HAVE_FIELD_API AND ( Serialbox_FOUND OR HDF5_FOUND ) ) From 9432125d5229f89b298bfd6332224f0758a3d24b Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 21 Mar 2023 14:13:15 +0000 Subject: [PATCH 014/174] Updated field_api variant README with CUDA heapsize info --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 49b5c09e..5e745379 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,9 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int) data transfers, as provided by the shipped prototype implmentation, and investigate the effect of different data storage allocation layouts. To enable this variant, a suitable CUDA installation is required and the - `--with-cuda` flag needs to be passed at the build stage. + `--with-cuda` flag needs to be passed at the build stage. This variant lets the CUDA runtime + manage temporary arrays and needs a large `NV_ACC_CUDA_HEAPSIZE` + (eg. `NV_ACC_CUDA_HEAPSIZE=8GB` for 160K columns.) ## Download and Installation From 476a2af7fc10ff6f3473eca9ede060ef7d07648b Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 21 Mar 2023 14:13:43 +0000 Subject: [PATCH 015/174] Added CUDA heapsize env var to field_api variant test --- src/cloudsc_gpu/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cloudsc_gpu/CMakeLists.txt b/src/cloudsc_gpu/CMakeLists.txt index 8fcfed82..785987c2 100644 --- a/src/cloudsc_gpu/CMakeLists.txt +++ b/src/cloudsc_gpu/CMakeLists.txt @@ -229,6 +229,7 @@ if( HAVE_CLOUDSC_GPU_SCC_FIELD ) ARGS 1 1000 128 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 + ENVIRONMENT "NVCOMPILER_ACC_CUDA_HEAPSIZE=64M" ) # Importantly, we add the "pinned" flag to the linker command to ensure pinning! From 7844498e3c0159ec6d4e2596748fb532ff581ee4 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 21 Mar 2023 15:00:09 +0000 Subject: [PATCH 016/174] Removed field_api variant from github actions verify-targets --- .github/scripts/verify-targets.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index c1021f6e..f4a81118 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -25,7 +25,6 @@ then if [[ "$cuda_flag" == "--with-cuda" ]] then targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching) - targets+=(dwarf-cloudsc-gpu-scc-field) fi if [[ "$cuda_flag" == "--with-cuda" && "$io_library_flag" == "--with-serialbox" ]] then From 9da2060d6377fee0d2d07f74f9c6ff152496e570 Mon Sep 17 00:00:00 2001 From: Zbigniew Piotrowski Date: Mon, 17 Apr 2023 16:02:24 +0200 Subject: [PATCH 017/174] PyIface - Fortran-Python bridging with CMake compilation (#38) * Initial copy of the python driver files and settings to the dedicated branch * Python calling of Fortran works in the develop branch * add missing fortran part * bundle build works for python driver * Cleanup of the execution process * Small cleanup. Verifies with activation of venv and setting LD_LIBRARY_PATH * rpath passed to f2py to find .so libs automatically. Works on Mac, to be checked on Atos * python packaging configs * abstracting field_c_to_fortran * abstracting field_fortran_to_c * rearrangement of pyiface sources, validates on ATOS, python packaging still not working * Mods to be able to perform pip install. Works with Python 3.11 * cmake improvements for the python driver to find its dependencies smoothly. Temporarily Python is a required package to build cloudsc * pyproject.toml added, now builds correctly on ATOS * First round of cmake cleanup * Second batch of cmake cleanup. Added ctest entries * Third batch of CMakelist improvements * Add master compiler options to f2py invocation; diagnostics and comment removal * Revert to explicit cloudsc loader in Python, not needed on Mac but needed on Atos. Add importlib to requirements for completeness * Basic README added * Fix to reenable CI in Github * Remove locals from driver, add clic option * encapsulation of parameters * Expands almost work, except for 4D vars * Calling expand works, but nlev has to be hardcoded in expand_r3, possibly to bug in f2py? * Setting nproma works * Setting ngptot works * Cleanup using pylint, a few pylint complains remain, but wont be adressed * Trying to make Python optional * Try once again * Try once again * Try once again * Try once again * Try once again * Turn off the build of Python interface for CI * RC1 - works successfuly with nproma=32 on ATOS using GNU and Intel. Now to the last check on Mac * RC2, corrected wrong character in Readme, downgrade of compulsory Python version in cmake to 3.8 to reenable CI compilation. Pyiface version itself is not tested in CI * PyIface: Reverting to 3.8 default Python version * PyIface: Use cmake macros to setup venv and build/install there This way we do not rely on the user to setup an bespoke venv and eventually shoot themselves in the foot. * PyIface: Clean up CMake layer by separating post-processing steps We now have three clearly defined steps to generate Python wrappers. * PyIface: Clean up cloudsc_data utility modules Remove redundant use of itemgetter, switch to keyword-args call signatures, etc. * PyIface: Do dynamic module load via utility routine and CLI override This now allows us to point to the wrapped-and-generated CLOUDSC Fortran module, so that we can execute the driver from anywhere. * PyIface: Fix pip installation of python utility package * PyIface: Dropping second pip install and install into build/bin This drops a lot of redundant source file copying in the final stage, where we now only copy the driver script over and rely on correct path discover via the CLI options. This now also allows, in theory, to use editable install of the Python package. * PyIface: Fix erroneous usage of expand_r3 and remove expand_r3bis * PyIface: Rename Python package to pyiface * PyIface: Fix ctest and install as editable; fix typo in error message * PyIface: Add CLI override for input file path location * PyIface: More fixes to path handling in driver * PyIface: Add second author to pyproject.toml * Install driver script in venv_pyiface * Add pyiface to Github actions * Github: export F77, F90 env vars * Custom run signature and dedicated build matrix entry for pyiface * Turn off pyiface by default * Add pyiface flag in build name * altered tendency names in cloudsc to match Loki transformations * Added cloudsc pyiface binary option to build.yml. Update of the READMEs * Formatting of READMEs are updated * README fix * Readme fixes, numbers of microphysics variables sources from the include file * Update performance after including constant microphysics variable numbering into cloudsc; add missing space * PyIface: Use constant cloud variable indices from common module * PyIface: Final tweaks to READMEs and pyproject.toml --------- Co-authored-by: Michael Lange Co-authored-by: Balthasar Reuter --- .github/scripts/.verify-targets.sh.swp | Bin 12288 -> 0 bytes .github/scripts/run-targets.sh | 4 + .github/scripts/verify-targets.sh | 5 + .github/workflows/build.yml | 18 +- README.md | 21 + arch/github/ubuntu/gnu/9.4.0/env.sh | 2 + bundle.yml | 8 + cmake/python_venv.cmake | 179 + src/CMakeLists.txt | 1 + src/cloudsc_pyiface/CMakeLists.txt | 157 + src/cloudsc_pyiface/LICENSE | 190 ++ src/cloudsc_pyiface/README.md | 62 + src/cloudsc_pyiface/drivers/__init__.py | 0 .../drivers/cloudsc_pyiface.py | 141 + src/cloudsc_pyiface/fortransrc/cloudsc.F90 | 2902 +++++++++++++++++ .../fortransrc/cloudsc_driver_mod.F90 | 209 ++ .../fortransrc/dwarf_cloudsc.F90 | 107 + src/cloudsc_pyiface/kind_map | 18 + src/cloudsc_pyiface/pyproject.toml | 43 + src/cloudsc_pyiface/src/pyiface/__init__.py | 12 + .../src/pyiface/cloudsc_data.py | 442 +++ src/cloudsc_pyiface/src/pyiface/dynload.py | 42 + src/common/module/expand_mod.F90 | 2 - 23 files changed, 4561 insertions(+), 4 deletions(-) delete mode 100644 .github/scripts/.verify-targets.sh.swp create mode 100644 cmake/python_venv.cmake create mode 100644 src/cloudsc_pyiface/CMakeLists.txt create mode 100644 src/cloudsc_pyiface/LICENSE create mode 100644 src/cloudsc_pyiface/README.md create mode 100644 src/cloudsc_pyiface/drivers/__init__.py create mode 100755 src/cloudsc_pyiface/drivers/cloudsc_pyiface.py create mode 100644 src/cloudsc_pyiface/fortransrc/cloudsc.F90 create mode 100644 src/cloudsc_pyiface/fortransrc/cloudsc_driver_mod.F90 create mode 100644 src/cloudsc_pyiface/fortransrc/dwarf_cloudsc.F90 create mode 100755 src/cloudsc_pyiface/kind_map create mode 100644 src/cloudsc_pyiface/pyproject.toml create mode 100644 src/cloudsc_pyiface/src/pyiface/__init__.py create mode 100644 src/cloudsc_pyiface/src/pyiface/cloudsc_data.py create mode 100644 src/cloudsc_pyiface/src/pyiface/dynload.py diff --git a/.github/scripts/.verify-targets.sh.swp b/.github/scripts/.verify-targets.sh.swp deleted file mode 100644 index 3eb947a5667efce221f74bd8cda692e4210c91cf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2O^72!7=~;2n+MOL2M-?fq@akfC%p;cTiu=6Y|@(`o|Nk0Ni)@5Uwz+G zT^)w0UVHb_4t=!QCOB3Jd1v)6{y_K~S-bWiiP^~5=4A)t7yR?>3(Z~@MdJlGU$X&Q z&j>S2Rnp>-$^ygpc^0r%aKLovCw|%3YW8J1$a*cqwM|5z$T3Pg zU6%KJc|J9BE`N0=XTTY72AlzBz!`7`oB?OR8E^)i0cXG&_zxOj0U;0FLCEkknyO)rk!5g3pc0n6#g45v7 zGlYB#u7mf%d*EH5z!lI0H|`A-{m{z{lVt@FBPkJ^&doK!eA@V_*~f4o_c#V{jE50qgDxaECMC3^)VM zfHU9>I0G{V#8{@ZZa??A3IrjcB$Nt1P}J*mgw{%TIu}(I2ejPh*rD}vk)i$;?PW3w zT0I%lt2EVgqlxReZ#rWv(O(XbfpV!NSJ0Lk!oo~12x}Lx>vXRZTG{7^*y@1O+DW~z*buX z4wz>RoVZ??3y;`=&y%bwjr>|a$*RUiw~DdR`cKBL2%Koa%fRb-QTDlrgm2rW!Lwb) zUsx0bSJh*^=g#wSiyg?-Gb95ss*b=K(+kF{+Xc*Lym>TE ziN9Skd}bD<+K*&UGkrK8(O}K9sHet7+%n{3VZTu9bl;I&t7LF!Bsc!0inFjqGbcg_Fs)Z_AtSJZ$ncBBlPgIEhTrL?$9+G9tBg`%Tj7 IF>Is%8#vi|1poj5 diff --git a/.github/scripts/run-targets.sh b/.github/scripts/run-targets.sh index 77b16da4..1c4c6128 100755 --- a/.github/scripts/run-targets.sh +++ b/.github/scripts/run-targets.sh @@ -21,6 +21,7 @@ then skipped_targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching) skipped_targets+=(dwarf-cloudsc-loki-scc-cuf-hoist dwarf-cloudsc-loki-scc-cuf-parametrise) skipped_targets+=(dwarf-cloudsc-cuda dwarf-cloudsc-cuda-hoist dwarf-cloudsc-cuda-k-caching) + # Skip C target if built with nvhpc, segfaults for unknown reasons skipped_targets+=(dwarf-cloudsc-c dwarf-cloudsc-loki-c) fi @@ -45,6 +46,9 @@ do # Two ranks with one thread each, safe NPROMA # NB: Use oversubscribe to run, even if we end up on a single core agent mpirun --oversubscribe -np 2 bin/$target 1 100 64 + elif [[ "$target" == "cloudsc_pyiface.py" ]] + then + bin/$target --numomp 1 --ngptot 100 --nproma 64 else # Single thread, safe NPROMA bin/$target 1 100 64 diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index b36d548b..9528f65a 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -51,6 +51,11 @@ then fi fi +if [[ "$pyiface_flag" == "--cloudsc-fortran-pyiface=ON" ]] +then + targets+=(cloudsc_pyiface.py) +fi + # # Verify each target exists # diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f2724644..6c5c310e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ on: jobs: # This workflow contains a single job called "build" build: - name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }} + name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }} ${{ matrix.pyiface_flag }} # The type of runner that the job will run on runs-on: ubuntu-20.04 @@ -44,6 +44,8 @@ jobs: claw_flag: [''] # Flag to enable CLAW-generated variants + pyiface_flag: [''] # Flag to enable Python-interface variant + include: # Add nvhpc build configurations with serialbox and HDF5 - arch: github/ubuntu/nvhpc/21.9 @@ -53,6 +55,7 @@ jobs: gpu_flag: '--with-gpu' cuda_flag: '--with-cuda' loki_flag: '--with-loki' + pyiface_flag: '' - arch: github/ubuntu/nvhpc/21.9 io_library_flag: '--with-serialbox' mpi_flag: '' @@ -60,6 +63,16 @@ jobs: gpu_flag: '--with-gpu' cuda_flag: '--with-cuda' loki_flag: '--with-loki' + pyiface_flag: '' + # Add pyiface build configuration for HDF5 only + - arch: github/ubuntu/gnu/9.4.0 + io_library_flag: '' + mpi_flag: '' + prec_flag: '' + gpu_flag: '' + cuda_flag: '' + loki_flag: '' + pyiface_flag: '--cloudsc-fortran-pyiface=ON' # Steps represent a sequence of tasks that will be executed as part of the job steps: @@ -105,7 +118,7 @@ jobs: ./cloudsc-bundle build --retry-verbose \ --arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \ ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} \ - ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} + ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} ${{ matrix.pyiface_flag }} # Verify targets exist - name: Verify targets @@ -116,6 +129,7 @@ jobs: cuda_flag: ${{ matrix.cuda_flag }} loki_flag: ${{ matrix.loki_flag }} claw_flag: ${{ matrix.claw_flag }} + pyiface_flag: ${{ matrix.pyiface_flag }} run: .github/scripts/verify-targets.sh # Run double-precision targets diff --git a/README.md b/README.md index b2067341..a217b2dd 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,15 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int) `--with-cuda` flag needs to be passed at the build stage. This variant lets the CUDA runtime manage temporary arrays and needs a large `NV_ACC_CUDA_HEAPSIZE` (eg. `NV_ACC_CUDA_HEAPSIZE=8GB` for 160K columns.) +- **cloudsc-pyiface.py**: a combination of the cloudsc/cloudsc-driver routines + of cloudsc-fortran with the uppermost `dwarf` program replaced with a + corresponding Python script capable of HDF5 data load and + verification of computation results. The computation is realized by the + Fortran subprogram, mimicking cloudsc-fortran and equipped with only + minor modifications (i.e. derived types/global paramters handling). + Turned off by default, activate at the build stage with + `--cloudsc-fortran-pyiface=ON`. + ## Download and Installation @@ -339,6 +348,18 @@ The following Loki modes are included in the dwarf, each with a bespoke demonstr To enable the deprecated and, on GPU, defunct CLAW variants, the build-flag `--with-claw` needs to be specified explicitly. +## Python-driven CLOUDSC variants +The following partly or fully Python-based CLOUDSC are available: +- **cloudsc-python**: GT4PY based Python-only implementation. Refer to `src/cloudsc_python` + for information on how to bootstrap/execute this variant +- **cloudsc-pyiface**: Fortran-based CLOUDSC variant driven by the Python script. + Activate with: +```sh +./cloudsc-bundle build --clean --cloudsc-fortran-pyiface=ON +``` +These variants are disabled by default. Refer to README.md in corresponding subdirectories +for further information. + ### A note on frontends Loki currently supports three frontends to parse the Fortran source code: diff --git a/arch/github/ubuntu/gnu/9.4.0/env.sh b/arch/github/ubuntu/gnu/9.4.0/env.sh index 198e0466..c8eb1fcb 100644 --- a/arch/github/ubuntu/gnu/9.4.0/env.sh +++ b/arch/github/ubuntu/gnu/9.4.0/env.sh @@ -3,5 +3,7 @@ export CC=gcc-9 export CXX=g++-9 export FC=gfortran-9 +export F77=gfortran-9 +export F90=gfortran-9 export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/bundle.yml b/bundle.yml index 86564e11..7669d06b 100644 --- a/bundle.yml +++ b/bundle.yml @@ -114,6 +114,14 @@ options : help : Build the new Fortran version of CLOUDSC [ON|OFF] cmake : ENABLE_CLOUDSC_FORTRAN={{value}} + - cloudsc-fortran-pyiface : + help : Build the Python driver interfaced with the new Fortran version of CLOUDSC [ON|OFF] + cmake : ENABLE_CLOUDSC_FORTRAN_PYIFACE={{value}} + + - cloudsc-fortran-pyiface-binary : + help : Build the Fortran binary for the PYIFACE version [ON|OFF] + cmake : ENABLE_CLOUDSC_FORTRAN_PYIFACE_BINARY={{value}} + - cloudsc-c : help : Build the C version of CLOUDSC [ON|OFF] cmake : ENABLE_CLOUDSC_C={{value}} diff --git a/cmake/python_venv.cmake b/cmake/python_venv.cmake new file mode 100644 index 00000000..95d143e1 --- /dev/null +++ b/cmake/python_venv.cmake @@ -0,0 +1,179 @@ +# (C) Copyright 2018- ECMWF. +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +############################################################################## +#.rst: +# +# find_python_venv +# ================ +# +# Find Python 3 inside a virtual environment. :: +# +# find_python_venv(VENV_PATH) +# +# It finds the Python3 Interpreter from a virtual environment at +# the given location (`VENV_PATH`) +# +# Options +# ------- +# +# :VENV_PATH: The path to the virtual environment +# +# Output variables +# ---------------- +# :Python3_FOUND: Exported into parent scope from FindPython3 +# :Python3_EXECUTABLE: Exported into parent scope from FindPython3 +# :Python3_VENV_BIN: The path to the virtual environment's `bin` directory +# :ENV{VIRTUAL_ENV}: Environment variable with the virtual environment directory, +# emulating the activate script +# +############################################################################## + +function( find_python_venv VENV_PATH ) + + # Update the environment with VIRTUAL_ENV variable (mimic the activate script) + set( ENV{VIRTUAL_ENV} ${VENV_PATH} ) + + # Change the context of the search to only find the venv + set( Python3_FIND_VIRTUALENV ONLY ) + + # Unset Python3_EXECUTABLE because it is also an input variable + # (see documentation, Artifacts Specification section) + unset( Python3_EXECUTABLE ) + + # Launch a new search + find_package( Python3 COMPONENTS Interpreter Development REQUIRED ) + + # Find the binary directory of the virtual environment + execute_process( + COMMAND ${Python3_EXECUTABLE} -c "import sys; import os.path; print(os.path.dirname(sys.executable), end='')" + OUTPUT_VARIABLE Python3_VENV_BIN + ) + + # Forward variables to parent scope + foreach ( _VAR_NAME Python3_FOUND Python3_EXECUTABLE Python3_VENV_BIN ) + set( ${_VAR_NAME} ${${_VAR_NAME}} PARENT_SCOPE ) + endforeach() + +endfunction() + +############################################################################## +#.rst: +# +# create_python_venv +# ================== +# +# Find Python 3 and create a virtual environment. :: +# +# create_python_venv(VENV_PATH) +# +# Installation procedure +# ---------------------- +# +# It creates a virtual environment at the given location (`VENV_PATH`) +# +# Options +# ------- +# +# :VENV_PATH: The path to use for the virtual environment +# +############################################################################## + +function( create_python_venv VENV_PATH ) + + # Discover only system install Python 3 + set( Python3_FIND_VIRTUALENV STANDARD ) + find_package( Python3 COMPONENTS Interpreter REQUIRED ) + + # Create a loki virtualenv + message( STATUS "Create Python virtual environment ${VENV_PATH}" ) + execute_process( COMMAND ${Python3_EXECUTABLE} -m venv --copies "${VENV_PATH}" ) + + # Make the virtualenv portable by automatically deducing the VIRTUAL_ENV path from + # the 'activate' script's location in the filesystem + execute_process( + COMMAND + sed -i "s/^VIRTUAL_ENV=\".*\"$/VIRTUAL_ENV=\"$(cd \"$(dirname \"$(dirname \"\${BASH_SOURCE[0]}\" )\")\" \\&\\& pwd)\"/" "${VENV_PATH}/bin/activate" + ) + +endfunction() + +############################################################################## +#.rst: +# +# setup_python_venv +# ================= +# +# Find Python 3, create a virtual environment and make it available. :: +# +# setup_python_venv(VENV_PATH) +# +# It combines calls to `create_python_venv` and `find_python_venv` +# +# Options +# ------- +# +# :VENV_PATH: The path to use for the virtual environment +# +# Output variables +# ---------------- +# :Python3_FOUND: Exported into parent scope from FindPython3 +# :Python3_EXECUTABLE: Exported into parent scope from FindPython3 +# :Python3_VENV_BIN: The path to the virtual environment's `bin` directory +# :ENV{VIRTUAL_ENV}: Environment variable with the virtual environment directory, +# emulating the activate script +# +############################################################################## + +macro( setup_python_venv VENV_PATH ) + + # Create the virtual environment + create_python_venv( ${VENV_PATH} ) + + # Discover Python in the virtual environment and set-up variables + find_python_venv( ${VENV_PATH} ) + +endmacro() + +############################################################################## +#.rst: +# +# update_python_shebang +# ===================== +# +# Update the shebang in the given executable scripts to link them to a +# Python executable that is located in the same directory. :: +# +# update_python_shebang( executable1 [executable2] [...] ) +# +############################################################################## + +function( update_python_shebang ) + + foreach( _exe IN LISTS ARGV ) + + # Replace the shebang in the executable script by the following to use the + # Python binary that resides in the same directory as the script + # (see https://stackoverflow.com/a/57567228). + # That allows to move the script elsewhere along with the rest of the virtual + # environment without breaking the link to the venv-interpreter + # + # #!/bin/sh + # "true" '''\' + # exec "$(dirname "$(readlink -f "$0")")"/python "$0" "$@" + # ''' + + ecbuild_debug( "Update shebang for ${_exe}" ) + + execute_process( + COMMAND + sed -i "1s/^.*$/#\\!\\/bin\\/sh\\n\\\"true\\\" '''\\\\'\\nexec \\\"$(dirname \\\"$(readlink -f \\\"\\$0\\\")\\\")\\\"\\/python \\\"\\$0\\\" \\\"\\$@\\\"\\n'''/" ${_exe} + ) + + endforeach() + +endfunction() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b0a1c105..b1513fb3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ add_subdirectory(prototype1) add_subdirectory(common) add_subdirectory(cloudsc_fortran) +add_subdirectory(cloudsc_pyiface) add_subdirectory(cloudsc_c) add_subdirectory(cloudsc_cuda) add_subdirectory(cloudsc_gpu) diff --git a/src/cloudsc_pyiface/CMakeLists.txt b/src/cloudsc_pyiface/CMakeLists.txt new file mode 100644 index 00000000..38f69cc7 --- /dev/null +++ b/src/cloudsc_pyiface/CMakeLists.txt @@ -0,0 +1,157 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Define this dwarf variant as an ECBuild feature +ecbuild_add_option( FEATURE CLOUDSC_FORTRAN_PYIFACE + DESCRIPTION "Build the Python interface to the (slightly customized) cloudsc-fortran" DEFAULT OFF + CONDITION HDF5_FOUND +) +ecbuild_add_option( FEATURE CLOUDSC_FORTRAN_PYIFACE_BINARY + DESCRIPTION "Build the debug-oriented binary for Python interface of (customized) cloudsc-fortran" DEFAULT OFF + CONDITION HAVE_CLOUDSC_FORTRAN_PYIFACE +) + +if( HAVE_CLOUDSC_FORTRAN_PYIFACE ) + + # Utilities to manage Python virtual environments + include( python_venv ) + + # Set up a custom venv for this variant and install the necessary dependencies + set( pyiface_VENV_PATH ${CMAKE_BINARY_DIR}/venv_pyiface ) + setup_python_venv( ${pyiface_VENV_PATH} ) + + if( NOT Python3_EXECUTABLE ) + ecbuild_error("[PyIface] Could not find Python3 executable in virtualenv") + endif() + + # Install the f90wrap build dependency and via latest pip + execute_process(COMMAND ${Python3_EXECUTABLE} -m pip install --upgrade pip) + execute_process(COMMAND ${Python3_EXECUTABLE} -m pip install -e ${CMAKE_CURRENT_SOURCE_DIR}) + + # Define module directory to facilitate f90wrap/f2py execution + ecbuild_enable_fortran(MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../module) + message (STATUS "Module directory set to:") + message (STATUS ${CMAKE_Fortran_MODULE_DIRECTORY}) + + + # Set names and lists to abstract the f90wrap/f2py process + set( DWARF_CLOUDSC_LIB dwarf-cloudsc-lib) + set( DWARF_COMMON_LIB cloudsc-common-lib) + set( PYTHON_MODN cloudsc) + set( FORTRAN_PYTHON_COMMON_SOURCES + yomphyder yoecldp yoethf yomcst yoephli expand_mod + ) + set( FORTRAN_PYTHON_DWARF_SOURCES cloudsc_driver_mod ) + set( COMMON_MOD_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/../common/module/ ) + set( COMMON_MOD_BIN_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/../common/module/ ) + set( FORTRAN_SRC_LOCATION ${CMAKE_CURRENT_SOURCE_DIR}/fortransrc/ ) + + # Manipulate lists to prepare abstracted command-line input/output for f90wrap/f2py + set( F90WRAP_COMMON_SOURCES ${FORTRAN_PYTHON_COMMON_SOURCES}) + set( F90WRAP_DWARF_SOURCES ${FORTRAN_PYTHON_DWARF_SOURCES}) + set( F2PY_COMMON_SOURCES ${FORTRAN_PYTHON_COMMON_SOURCES}) + set( F2PY_DWARF_SOURCES ${FORTRAN_PYTHON_DWARF_SOURCES}) + list(TRANSFORM F90WRAP_COMMON_SOURCES PREPEND ${COMMON_MOD_LOCATION}) + list(TRANSFORM F90WRAP_COMMON_SOURCES APPEND .F90 ) + list(TRANSFORM F90WRAP_DWARF_SOURCES PREPEND ${FORTRAN_SRC_LOCATION}) + list(TRANSFORM F90WRAP_DWARF_SOURCES APPEND .F90 ) + list(TRANSFORM F2PY_COMMON_SOURCES PREPEND f90wrap_) + list(TRANSFORM F2PY_COMMON_SOURCES APPEND .f90 ) + list(TRANSFORM F2PY_DWARF_SOURCES PREPEND f90wrap_) + list(TRANSFORM F2PY_DWARF_SOURCES APPEND .f90 ) + + # Build CLOUDSC driver/kernel library, to be further linked by f2py + ecbuild_add_library( TARGET ${DWARF_CLOUDSC_LIB} + SOURCES + ./fortransrc/cloudsc_driver_mod.F90 + ./fortransrc/cloudsc.F90 + PUBLIC_LIBS + ${DWARF_COMMON_LIB} + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + # Two-step F90wrap process: First, generate Fortran wrappers + add_custom_command( TARGET ${DWARF_CLOUDSC_LIB} POST_BUILD + COMMAND ${Python3_VENV_BIN}/f90wrap -m${PYTHON_MODN} + ${F90WRAP_COMMON_SOURCES} ${F90WRAP_DWARF_SOURCES} + -k ${CMAKE_CURRENT_SOURCE_DIR}/kind_map + > f90wrap_log.txt 2> f90wrap_log_err.txt + COMMENT "[PyIface] Executing f90wrap to generate Fortran wrappers" + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + VERBATIM + ) + + # Two-step F90wrap process: Then compile and generate Python wrappers via F2Py + # Note that we execute this in CMAKE_BINARY_DIR to make the resulting + # _cloudsc.arch.so library directly available for dynamic loading, as f2py-f90wrap + # does not allow specifying a particular output directory or path. + add_custom_command(TARGET ${DWARF_CLOUDSC_LIB} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E env --unset=LD_LIBRARY_FLAGS + LDFLAGS=-Wl,-rpath,${CMAKE_BINARY_DIR}/lib + NPY_DISTUTILS_APPEND_FLAGS=1 + ${Python3_VENV_BIN}/f2py-f90wrap -c + --f90exec=${CMAKE_Fortran_COMPILER} + --f90flags=${CMAKE_Fortran_FLAGS} + --f90flags=${CMAKE_Fortran_FLAGS_${CMAKE_BUILD_TYPE_CAPS}} + -m _${PYTHON_MODN} + -I${CMAKE_Fortran_MODULE_DIRECTORY} + -I${COMMON_MOD_BIN_LOCATION} + -L${CMAKE_BINARY_DIR}/lib + -l${DWARF_COMMON_LIB} + -l${DWARF_CLOUDSC_LIB} + ${F2PY_COMMON_SOURCES} ${F2PY_DWARF_SOURCES} + > f2py_log.txt 2> f2py_log_err.txt + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "[PyIface] Executing f2py-f90wrap to compile and generate Python wrappers" + VERBATIM + ) + + # Copy the CLI driver script into the bin directory for execution + add_custom_command(TARGET ${DWARF_CLOUDSC_LIB} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/bin + COMMAND ${CMAKE_COMMAND} -E create_symlink ${Python3_VENV_BIN}/cloudsc_pyiface.py ${CMAKE_BINARY_DIR}/bin/cloudsc_pyiface.py + COMMENT "[PyIface] Installing Python package and driver via 'pip install'" + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-pyiface + COMMAND bin/cloudsc_pyiface.py + ARGS --numomp=1 --ngptot=100 --nproma=16 --cloudsc-path=${CMAKE_BINARY_DIR} --input-path=${CMAKE_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + OMP 1 + ) + + + if( HAVE_CLOUDSC_FORTRAN_PYIFACE_BINARY ) + # Define the (optional) binary build target for this variant + ecbuild_add_executable( TARGET dwarf-cloudsc-fortran-pyiref + SOURCES + ./fortransrc/dwarf_cloudsc.F90 + LIBS + ${DWARF_COMMON_LIB} + ${DWARF_CLOUDSC_LIB} + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-fortran-pyiref + COMMAND bin/dwarf-cloudsc-fortran-pyiref + ARGS 1 100 16 + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + OMP 1 + ) + endif() + + if( HAVE_HDF5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) + endif() + +endif() diff --git a/src/cloudsc_pyiface/LICENSE b/src/cloudsc_pyiface/LICENSE new file mode 100644 index 00000000..b52c47b2 --- /dev/null +++ b/src/cloudsc_pyiface/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 1988- ECMWF + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/src/cloudsc_pyiface/README.md b/src/cloudsc_pyiface/README.md new file mode 100644 index 00000000..cde4bdb3 --- /dev/null +++ b/src/cloudsc_pyiface/README.md @@ -0,0 +1,62 @@ +This is a driver allowing to execute IFS physics from within a Python script, currently adapted to CLOUDSC. + +Steps to run and perform basic test on ATOS: +# Build as usual; the PyIface setup will create a custom venv in the build directory +``` +./cloudsc-bundle create +./cloudsc-bundle build --build-type=release --cloudsc-fortran-pyiface=ON --arch=./arch/ecmwf/hpc2020/intel/2021.4.0/ +``` +# Work in an interactive session on the computing node: +``` +cd build && . env.sh +export OMP_NUM_THREADS=64 +OMP_PLACES=cores srun -q np --ntasks=1 --hint=nomultithread --cpus-per-task=$OMP_NUM_THREADS --pty /bin/bash +``` +To test performance, execute: +``` +cd build && . env.sh +export OMP_NUM_THREADS=64 +./bin/cloudsc_pyiface.py --numomp=$OMP_NUM_THREADS --ngptot=163840 --nproma=32 +``` +#or, alternatively, submit the non-interactive test job using: +``` +OMP_PLACES=cores srun -q np --ntasks=1 --hint=nomultithread --cpus-per-task=$OMP_NUM_THREADS ./bin/cloudsc_pyiface.py --numomp 64 --ngptot 163840 --nproma 32 +``` + +# Additional options +An additional CLI option ``--cloudsc-path=`` +can be used if the build location used to run f90wrap has changed. + +In addition, to test the Fortran part of the pyiface code independently of the Python driver, +`` --cloudsc-fortran-pyiface-binary=ON`` option can be used to build Fortran-only binary, mimicking +regular cloudsc-fortran structure. This in particular allows to test if the slight modifications +to Fortran code alter the computational performance. + +# Current performance +Currently, the performance on a single socket with AMD Rome 7742 is about about 64400 Mflops/s, +which is inferior to the reference result of: +`dwarf-cloudsc-fortran-pyiref` (about 100500), and +`dwarf-cloudsc-fortran` (about 104000) + +Similar results can be achieved using GNU compilers on ATOS using `--arch=./arch/ecmwf/hpc2020/gnu/11.2.0/` + +# Known issues + +### Performance limitations +The performance of PyIface wrapper is inferior as compared to the +`dwarf-cloudsc-fortran` reference. This is probably due to the fact that in +the process of building Fortran binaries, f2py adds low optimization +flags behind the scenes (flags vary between compilers). To +circumevent the problem, a separate explicit compilation step of +f90wrap output files is probably deserved. + +### Nvidia compilation +For the same reason, extra effort is needed to enable compile/run on +ATOS with nvhpc. Currently, invalid flags are being passed at the +f2py c compilation step, i.e.: +``` +nvc-Error-Unknown switch: -Wno-unused-result +nvc-Error-Unknown switch: -fwrapv +nvc-Error-Unknown switch: -Wno-unused-result +nvc-Error-Unknown switch: -fwrapv +``` diff --git a/src/cloudsc_pyiface/drivers/__init__.py b/src/cloudsc_pyiface/drivers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cloudsc_pyiface/drivers/cloudsc_pyiface.py b/src/cloudsc_pyiface/drivers/cloudsc_pyiface.py new file mode 100755 index 00000000..24045ee6 --- /dev/null +++ b/src/cloudsc_pyiface/drivers/cloudsc_pyiface.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +""" +Driver that executes Fortran implementation of the CLOUDSC dwarf using f90wrap/f2py +""" +from pathlib import Path +import click + +from pyiface import cloudsc_data +from pyiface.dynload import load_module + + +@click.command() +@click.option( + "--numomp", type=int, default=1, + help="Number of OpenMP threads used for the benchmark. Default: 1", +) +@click.option( + "--ngptot", type=int, default=100, + help="Total number of grid points (NGPTOT) used for the benchmark. Default: 100", +) +@click.option( + "--nproma", type=int, default=100, + help="Block sizes (NPROMA) used for the benchmark. Default: 100", +) +@click.option( + "--cloudsc-path", type=click.Path(exists=True), default=Path.cwd(), + help="Path to the Python-wrapped and compiled CLOUDSC module", +) +@click.option( + "--input-path", type=click.Path(exists=True), default=Path.cwd(), + help="Path to input and reference files; by default './'", +) +def main(numomp: int, ngptot: int, nproma: int, cloudsc_path, input_path) -> None: + """ + Python driver to execute IFS physics kernel (CLOUDSC). + + Performs the following tasks: + - loads input variables and parameters from .h5 file, + - invokes Fortran kernel computation, + - validates against reference results read from another .h5 file. + """ + + cloudsc_path = Path(cloudsc_path) + input_path = Path(input_path) + + # Dynamically load the Python-wrapped Fortran CLOUDSC module + clsc = load_module(module='cloudsc', modpath=Path(cloudsc_path)) + + # Defining common parameters + nlev = 137 + ndim = 5 + ngptotg = ngptot + nblocks = int( (ngptot / nproma) + min(ngptot % nproma, 1) ) + nclv = 5 # number of microphysics variables + npars = dict( + nlev=nlev, ngptot=ngptot, ngptotg=ngptotg, nproma=nproma, + nblocks=nblocks, ndim=ndim, nclv=nclv + ) + + # Allocate temporary and output fields + clsfields = cloudsc_data.define_fortran_fields( + nproma=nproma, nlev=nlev, nblocks=nblocks, clsc=clsc + ) + + # Get reference solution fields from file + ref_fields = cloudsc_data.load_reference_fields( + path=input_path/'reference.h5', clsc=clsc, **npars + ) + + # Get input data fields from file + cloudsc_data.load_input_parameters( + input_path/'input.h5', clsfields['ydecldp'], clsfields['ydephli'], + clsfields['ydomcst'], clsfields['ydoethf'] + ) + input_fort_fields = cloudsc_data.load_input_fortran_fields( + path=input_path/'input.h5', fields=clsfields, clsc=clsc, **npars + ) + + # Execute kernel via Python-wrapped, compiled Fortran driver + clsc.cloudsc_driver_mod.cloudsc_driver( + numomp, nproma, nlev, ngptot, ngptotg, + input_fort_fields['kfldx'], + input_fort_fields['PTSPHY'], + input_fort_fields['pt'], + input_fort_fields['pq'], + clsfields['buffer_tmp'], + clsfields['buffer_loc'], + input_fort_fields['pvfa'], + input_fort_fields['pvfl'], + input_fort_fields['pvfi'], + input_fort_fields['pdyna'], + input_fort_fields['pdynl'], + input_fort_fields['pdyni'], + input_fort_fields['phrsw'], + input_fort_fields['phrlw'], + input_fort_fields['pvervel'], + input_fort_fields['pap'], + input_fort_fields['paph'], + input_fort_fields['plsm'], + input_fort_fields['ldcum'], + input_fort_fields['ktype'], + input_fort_fields['plu'], + input_fort_fields['plude'], + input_fort_fields['psnde'], + input_fort_fields['pmfu'], + input_fort_fields['pmfd'], + input_fort_fields['pa'], + input_fort_fields['pclv'], + input_fort_fields['psupsat'], + input_fort_fields['plcrit_aer'], + input_fort_fields['picrit_aer'], + input_fort_fields['pre_ice'], + input_fort_fields['pccn'], + input_fort_fields['pnice'], + clsfields['pcovptot'], + clsfields['prainfrac_toprfz'], + clsfields['pfsqlf'], + clsfields['pfsqif'], + clsfields['pfcqnng'], + clsfields['pfcqlng'], + clsfields['pfsqrf'], + clsfields['pfsqsf'], + clsfields['pfcqrng'], + clsfields['pfcqsng'], + clsfields['pfsqltur'], + clsfields['pfsqitur'], + clsfields['pfplsl'], + clsfields['pfplsn'], + clsfields['pfhpsl'], + clsfields['pfhpsn'], + clsfields['ydomcst'], + clsfields['ydoethf'], + clsfields['ydecldp'], + ) + output_fields = cloudsc_data.convert_fortran_output_to_python (clsfields, **npars) + print ("Python-side validation:") + cloudsc_data.cloudsc_validate(output_fields, ref_fields) + + +if __name__ == "__main__": + main() diff --git a/src/cloudsc_pyiface/fortransrc/cloudsc.F90 b/src/cloudsc_pyiface/fortransrc/cloudsc.F90 new file mode 100644 index 00000000..195f6c80 --- /dev/null +++ b/src/cloudsc_pyiface/fortransrc/cloudsc.F90 @@ -0,0 +1,2902 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +SUBROUTINE CLOUDSC & + !---input + & (KIDIA, KFDIA, KLON, KLEV, & + & PTSPHY,& + & PT, PQ, & + & TENDENCY_TMP_T,TENDENCY_TMP_A,TENDENCY_TMP_Q,TENDENCY_TMP_CLD, & + & TENDENCY_LOC_T,TENDENCY_LOC_A,TENDENCY_LOC_Q,TENDENCY_LOC_CLD, & + & PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, & + & PHRSW, PHRLW,& + & PVERVEL, PAP, PAPH,& + & PLSM, LDCUM, KTYPE, & + & PLU, PLUDE, PSNDE, PMFU, PMFD,& + !---prognostic fields + & PA,& + & PCLV, & + & PSUPSAT,& +!-- arrays for aerosol-cloud interactions +!!! & PQAER, KAER, & + & PLCRIT_AER,PICRIT_AER,& + & PRE_ICE,& + & PCCN, PNICE,& + !---diagnostic output + & PCOVPTOT, PRAINFRAC_TOPRFZ,& + !---resulting fluxes + & PFSQLF, PFSQIF , PFCQNNG, PFCQLNG,& + & PFSQRF, PFSQSF , PFCQRNG, PFCQSNG,& + & PFSQLTUR, PFSQITUR , & + & PFPLSL, PFPLSN, PFHPSL, PFHPSN, KFLDX, & + & YDCST, YDTHF, YDECLDP) + +!=============================================================================== +!**** *CLOUDSC* - ROUTINE FOR PARAMATERIZATION OF CLOUD PROCESSES +! FOR PROGNOSTIC CLOUD SCHEME +!! +! M.Tiedtke, C.Jakob, A.Tompkins, R.Forbes (E.C.M.W.F.) +!! +! PURPOSE +! ------- +! THIS ROUTINE UPDATES THE CONV/STRAT CLOUD FIELDS. +! THE FOLLOWING PROCESSES ARE CONSIDERED: +! - Detrainment of cloud water from convective updrafts +! - Evaporation/condensation of cloud water in connection +! with heating/cooling such as by subsidence/ascent +! - Erosion of clouds by turbulent mixing of cloud air +! with unsaturated environmental air +! - Deposition onto ice when liquid water present (Bergeron-Findeison) +! - Conversion of cloud water into rain (collision-coalescence) +! - Conversion of cloud ice to snow (aggregation) +! - Sedimentation of rain, snow and ice +! - Evaporation of rain and snow +! - Melting of snow and ice +! - Freezing of liquid and rain +! Note: Turbulent transports of s,q,u,v at cloud tops due to +! buoyancy fluxes and lw radiative cooling are treated in +! the VDF scheme +!! +! INTERFACE. +! ---------- +! *CLOUDSC* IS CALLED FROM *CALLPAR* +! THE ROUTINE TAKES ITS INPUT FROM THE LONG-TERM STORAGE: +! T,Q,L,PHI AND DETRAINMENT OF CLOUD WATER FROM THE +! CONVECTIVE CLOUDS (MASSFLUX CONVECTION SCHEME), BOUNDARY +! LAYER TURBULENT FLUXES OF HEAT AND MOISTURE, RADIATIVE FLUXES, +! OMEGA. +! IT RETURNS ITS OUTPUT TO: +! 1.MODIFIED TENDENCIES OF MODEL VARIABLES T AND Q +! AS WELL AS CLOUD VARIABLES L AND C +! 2.GENERATES PRECIPITATION FLUXES FROM STRATIFORM CLOUDS +!! +! EXTERNALS. +! ---------- +! NONE +!! +! MODIFICATIONS. +! ------------- +! M. TIEDTKE E.C.M.W.F. 8/1988, 2/1990 +! CH. JAKOB E.C.M.W.F. 2/1994 IMPLEMENTATION INTO IFS +! A.TOMPKINS E.C.M.W.F. 2002 NEW NUMERICS +! 01-05-22 : D.Salmond Safety modifications +! 02-05-29 : D.Salmond Optimisation +! 03-01-13 : J.Hague MASS Vector Functions J.Hague +! 03-10-01 : M.Hamrud Cleaning +! 04-12-14 : A.Tompkins New implicit solver and physics changes +! 04-12-03 : A.Tompkins & M.Ko"hler moist PBL +! G.Mozdzynski 09-Jan-2006 EXP security fix +! 19-01-09 : P.Bechtold Changed increased RCLDIFF value for KTYPE=2 +! 07-07-10 : A.Tompkins/R.Forbes 4-Phase flexible microphysics +! 01-03-11 : R.Forbes Mixed phase changes and tidy up +! 01-10-11 : R.Forbes Melt ice to rain, allow rain to freeze +! 01-10-11 : R.Forbes Limit supersat to avoid excessive values +! 31-10-11 : M.Ahlgrimm Add rain, snow and PEXTRA to DDH output +! 17-02-12 : F.Vana Simplified/optimized LU factorization +! 18-05-12 : F.Vana Cleaning + better support of sequential physics +! N.Semane+P.Bechtold 04-10-2012 Add RVRFACTOR factor for small planet +! 01-02-13 : R.Forbes New params of autoconv/acc,rain evap,snow riming +! 15-03-13 : F. Vana New dataflow + more tendencies from the first call +! K. Yessad (July 2014): Move some variables. +! F. Vana 05-Mar-2015 Support for single precision +! 15-01-15 : R.Forbes Added new options for snow evap & ice deposition +! 10-01-15 : R.Forbes New physics for rain freezing +! 23-10-14 : P. Bechtold remove zeroing of convection arrays +! +! SWITCHES. +! -------- +!! +! MODEL PARAMETERS +! ---------------- +! RCLDIFF: PARAMETER FOR EROSION OF CLOUDS +! RCLCRIT_SEA: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER SEA +! RCLCRIT_LAND: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER LAND +! RLCRITSNOW: THRESHOLD VALUE FOR SNOW AUTOCONVERSION +! RKCONV: PARAMETER FOR AUTOCONVERSION OF CLOUDS (KESSLER) +! RCLDMAX: MAXIMUM POSSIBLE CLW CONTENT (MASON,1971) +!! +! REFERENCES. +! ---------- +! TIEDTKE MWR 1993 +! JAKOB PhD 2000 +! GREGORY ET AL. QJRMS 2000 +! TOMPKINS ET AL. QJRMS 2007 +!! +!=============================================================================== + +USE PARKIND1 , ONLY : JPIM, JPRB +!USE YOMHOOK , ONLY : LHOOK, DR_HOOK +USE YOMPHYDER ,ONLY : STATE_TYPE +USE YOECLDP , ONLY : TECLDP, NCLDQV, NCLDQL, NCLDQR, NCLDQI, NCLDQS, NCLV +USE YOMCST , ONLY : TOMCST +USE YOETHF , ONLY : TOETHF +IMPLICIT NONE + +!------------------------------------------------------------------------------- +! Declare input/output arguments +!------------------------------------------------------------------------------- + +! PLCRIT_AER : critical liquid mmr for rain autoconversion process +! PICRIT_AER : critical liquid mmr for snow autoconversion process +! PRE_LIQ : liq Re +! PRE_ICE : ice Re +! PCCN : liquid cloud condensation nuclei +! PNICE : ice number concentration (cf. CCN) + +REAL(KIND=JPRB) ,INTENT(IN) :: PLCRIT_AER(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PICRIT_AER(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PRE_ICE(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PCCN(KLON,KLEV) ! liquid cloud condensation nuclei +REAL(KIND=JPRB) ,INTENT(IN) :: PNICE(KLON,KLEV) ! ice number concentration (cf. CCN) + +INTEGER(KIND=JPIM),INTENT(IN) :: KLON ! Number of grid points +INTEGER(KIND=JPIM),INTENT(IN) :: KLEV ! Number of levels +INTEGER(KIND=JPIM),INTENT(IN) :: KIDIA +INTEGER(KIND=JPIM),INTENT(IN) :: KFDIA +REAL(KIND=JPRB) ,INTENT(IN) :: PTSPHY ! Physics timestep +REAL(KIND=JPRB) ,INTENT(IN) :: PT(KLON,KLEV) ! T at start of callpar +REAL(KIND=JPRB) ,INTENT(IN) :: PQ(KLON,KLEV) ! Q at start of callpar +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_T(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_Q(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_A(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_CLD(KLON,KLEV,NCLV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_TMP_T(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_TMP_Q(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_TMP_A(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_TMP_CLD(KLON,KLEV,NCLV) + +REAL(KIND=JPRB) ,INTENT(IN) :: PVFA(KLON,KLEV) ! CC from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PVFL(KLON,KLEV) ! Liq from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PVFI(KLON,KLEV) ! Ice from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNA(KLON,KLEV) ! CC from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNL(KLON,KLEV) ! Liq from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNI(KLON,KLEV) ! Liq from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PHRSW(KLON,KLEV) ! Short-wave heating rate +REAL(KIND=JPRB) ,INTENT(IN) :: PHRLW(KLON,KLEV) ! Long-wave heating rate +REAL(KIND=JPRB) ,INTENT(IN) :: PVERVEL(KLON,KLEV) !Vertical velocity +REAL(KIND=JPRB) ,INTENT(IN) :: PAP(KLON,KLEV) ! Pressure on full levels +REAL(KIND=JPRB) ,INTENT(IN) :: PAPH(KLON,KLEV+1)! Pressure on half levels +REAL(KIND=JPRB) ,INTENT(IN) :: PLSM(KLON) ! Land fraction (0-1) +LOGICAL ,INTENT(IN) :: LDCUM(KLON) ! Convection active +INTEGER(KIND=JPIM),INTENT(IN) :: KTYPE(KLON) ! Convection type 0,1,2 +REAL(KIND=JPRB) ,INTENT(IN) :: PLU(KLON,KLEV) ! Conv. condensate +REAL(KIND=JPRB) ,INTENT(INOUT) :: PLUDE(KLON,KLEV) ! Conv. detrained water +REAL(KIND=JPRB) ,INTENT(IN) :: PSNDE(KLON,KLEV) ! Conv. detrained snow +REAL(KIND=JPRB) ,INTENT(IN) :: PMFU(KLON,KLEV) ! Conv. mass flux up +REAL(KIND=JPRB) ,INTENT(IN) :: PMFD(KLON,KLEV) ! Conv. mass flux down +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KLON,KLEV) ! Original Cloud fraction (t) + +INTEGER(KIND=JPIM),INTENT(IN) :: KFLDX + +REAL(KIND=JPRB) ,INTENT(IN) :: PCLV(KLON,KLEV,NCLV) + + ! Supersat clipped at previous time level in SLTEND +REAL(KIND=JPRB) ,INTENT(IN) :: PSUPSAT(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(OUT) :: PCOVPTOT(KLON,KLEV) ! Precip fraction +REAL(KIND=JPRB) ,INTENT(OUT) :: PRAINFRAC_TOPRFZ(KLON) +! Flux diagnostics for DDH budget +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQLF(KLON,KLEV+1) ! Flux of liquid +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQIF(KLON,KLEV+1) ! Flux of ice +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQLNG(KLON,KLEV+1) ! -ve corr for liq +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQNNG(KLON,KLEV+1) ! -ve corr for ice +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQRF(KLON,KLEV+1) ! Flux diagnostics +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQSF(KLON,KLEV+1) ! for DDH, generic +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQRNG(KLON,KLEV+1) ! rain +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQSNG(KLON,KLEV+1) ! snow +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQLTUR(KLON,KLEV+1) ! liquid flux due to VDF +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQITUR(KLON,KLEV+1) ! ice flux due to VDF +REAL(KIND=JPRB) ,INTENT(OUT) :: PFPLSL(KLON,KLEV+1) ! liq+rain sedim flux +REAL(KIND=JPRB) ,INTENT(OUT) :: PFPLSN(KLON,KLEV+1) ! ice+snow sedim flux +REAL(KIND=JPRB) ,INTENT(OUT) :: PFHPSL(KLON,KLEV+1) ! Enthalpy flux for liq +REAL(KIND=JPRB) ,INTENT(OUT) :: PFHPSN(KLON,KLEV+1) ! Enthalp flux for ice + +!------------------------------------------------------------------------------- +! Declare local variables +!------------------------------------------------------------------------------- + +REAL(KIND=JPRB) :: & +! condensation and evaporation terms + & ZLCOND1(KLON), ZLCOND2(KLON),& + & ZLEVAP, ZLEROS,& + & ZLEVAPL(KLON), ZLEVAPI(KLON),& +! autoconversion terms + & ZRAINAUT(KLON), ZSNOWAUT(KLON), & + & ZLIQCLD(KLON), ZICECLD(KLON) +REAL(KIND=JPRB) :: ZFOKOOP(KLON), ZFOEALFA(KLON,KLEV+1) +REAL(KIND=JPRB) :: ZICENUCLEI(KLON) ! number concentration of ice nuclei + +REAL(KIND=JPRB) :: ZLICLD(KLON) +REAL(KIND=JPRB) :: ZACOND +REAL(KIND=JPRB) :: ZAEROS +REAL(KIND=JPRB) :: ZLFINALSUM(KLON) +REAL(KIND=JPRB) :: ZDQS(KLON) +REAL(KIND=JPRB) :: ZTOLD(KLON) +REAL(KIND=JPRB) :: ZQOLD(KLON) +REAL(KIND=JPRB) :: ZDTGDP(KLON) +REAL(KIND=JPRB) :: ZRDTGDP(KLON) +REAL(KIND=JPRB) :: ZTRPAUS(KLON) +REAL(KIND=JPRB) :: ZCOVPCLR(KLON) +REAL(KIND=JPRB) :: ZPRECLR +REAL(KIND=JPRB) :: ZCOVPTOT(KLON) +REAL(KIND=JPRB) :: ZCOVPMAX(KLON) +REAL(KIND=JPRB) :: ZQPRETOT(KLON) +REAL(KIND=JPRB) :: ZDPEVAP +REAL(KIND=JPRB) :: ZDTFORC +REAL(KIND=JPRB) :: ZDTDIAB +REAL(KIND=JPRB) :: ZTP1(KLON,KLEV) +REAL(KIND=JPRB) :: ZLDEFR(KLON) +REAL(KIND=JPRB) :: ZLDIFDT(KLON) +REAL(KIND=JPRB) :: ZDTGDPF(KLON) +REAL(KIND=JPRB) :: ZLCUST(KLON,NCLV) +REAL(KIND=JPRB) :: ZACUST(KLON) +REAL(KIND=JPRB) :: ZMF(KLON) + +REAL(KIND=JPRB) :: ZRHO(KLON) +REAL(KIND=JPRB) :: ZTMP1(KLON),ZTMP2(KLON),ZTMP3(KLON) +REAL(KIND=JPRB) :: ZTMP4(KLON),ZTMP5(KLON),ZTMP6(KLON),ZTMP7(KLON) +REAL(KIND=JPRB) :: ZALFAWM(KLON) + +! Accumulators of A,B,and C factors for cloud equations +REAL(KIND=JPRB) :: ZSOLAB(KLON) ! -ve implicit CC +REAL(KIND=JPRB) :: ZSOLAC(KLON) ! linear CC +REAL(KIND=JPRB) :: ZANEW +REAL(KIND=JPRB) :: ZANEWM1(KLON) + +REAL(KIND=JPRB) :: ZGDP(KLON) + +!---for flux calculation +REAL(KIND=JPRB) :: ZDA(KLON) +REAL(KIND=JPRB) :: ZLI(KLON,KLEV), ZA(KLON,KLEV) +REAL(KIND=JPRB) :: ZAORIG(KLON,KLEV) ! start of scheme value for CC + +LOGICAL :: LLFLAG(KLON) +LOGICAL :: LLO1 + +INTEGER(KIND=JPIM) :: ICALL, IK, JK, JL, JM, JN, JO, JLEN, IS + +REAL(KIND=JPRB) :: ZDP(KLON), ZPAPHD(KLON) + +REAL(KIND=JPRB) :: ZALFA +! & ZALFACU, ZALFALS +REAL(KIND=JPRB) :: ZALFAW +REAL(KIND=JPRB) :: ZBETA,ZBETA1 +!REAL(KIND=JPRB) :: ZBOTT +REAL(KIND=JPRB) :: ZCFPR +REAL(KIND=JPRB) :: ZCOR +REAL(KIND=JPRB) :: ZCDMAX +REAL(KIND=JPRB) :: ZMIN(KLON) +REAL(KIND=JPRB) :: ZLCONDLIM +REAL(KIND=JPRB) :: ZDENOM +REAL(KIND=JPRB) :: ZDPMXDT +REAL(KIND=JPRB) :: ZDPR +REAL(KIND=JPRB) :: ZDTDP +REAL(KIND=JPRB) :: ZE +REAL(KIND=JPRB) :: ZEPSEC +REAL(KIND=JPRB) :: ZFAC, ZFACI, ZFACW +REAL(KIND=JPRB) :: ZGDCP +REAL(KIND=JPRB) :: ZINEW +REAL(KIND=JPRB) :: ZLCRIT +REAL(KIND=JPRB) :: ZMFDN +REAL(KIND=JPRB) :: ZPRECIP +REAL(KIND=JPRB) :: ZQE +REAL(KIND=JPRB) :: ZQSAT, ZQTMST, ZRDCP +REAL(KIND=JPRB) :: ZRHC, ZSIG, ZSIGK +REAL(KIND=JPRB) :: ZWTOT +REAL(KIND=JPRB) :: ZZCO, ZZDL, ZZRH, ZZZDT, ZQADJ +REAL(KIND=JPRB) :: ZQNEW, ZTNEW +REAL(KIND=JPRB) :: ZRG_R,ZGDPH_R,ZCONS1,ZCOND,ZCONS1A +REAL(KIND=JPRB) :: ZLFINAL +REAL(KIND=JPRB) :: ZMELT +REAL(KIND=JPRB) :: ZEVAP +REAL(KIND=JPRB) :: ZFRZ +REAL(KIND=JPRB) :: ZVPLIQ, ZVPICE +REAL(KIND=JPRB) :: ZADD, ZBDD, ZCVDS, ZICE0, ZDEPOS +REAL(KIND=JPRB) :: ZSUPSAT(KLON) +REAL(KIND=JPRB) :: ZFALL +REAL(KIND=JPRB) :: ZRE_ICE +REAL(KIND=JPRB) :: ZRLDCP +REAL(KIND=JPRB) :: ZQP1ENV + +!---------------------------- +! Arrays for new microphysics +!---------------------------- +INTEGER(KIND=JPIM) :: IPHASE(NCLV) ! marker for water phase of each species + ! 0=vapour, 1=liquid, 2=ice + +INTEGER(KIND=JPIM) :: IMELT(NCLV) ! marks melting linkage for ice categories + ! ice->liquid, snow->rain + +LOGICAL :: LLFALL(NCLV) ! marks falling species + ! LLFALL=0, cloud cover must > 0 for zqx > 0 + ! LLFALL=1, no cloud needed, zqx can evaporate + +LOGICAL :: LLINDEX1(KLON,NCLV) ! index variable +LOGICAL :: LLINDEX3(KLON,NCLV,NCLV) ! index variable +REAL(KIND=JPRB) :: ZMAX +REAL(KIND=JPRB) :: ZRAT +INTEGER(KIND=JPIM) :: IORDER(KLON,NCLV) ! array for sorting explicit terms + +REAL(KIND=JPRB) :: ZLIQFRAC(KLON,KLEV) ! cloud liquid water fraction: ql/(ql+qi) +REAL(KIND=JPRB) :: ZICEFRAC(KLON,KLEV) ! cloud ice water fraction: qi/(ql+qi) +REAL(KIND=JPRB) :: ZQX(KLON,KLEV,NCLV) ! water variables +REAL(KIND=JPRB) :: ZQX0(KLON,KLEV,NCLV) ! water variables at start of scheme +REAL(KIND=JPRB) :: ZQXN(KLON,NCLV) ! new values for zqx at time+1 +REAL(KIND=JPRB) :: ZQXFG(KLON,NCLV) ! first guess values including precip +REAL(KIND=JPRB) :: ZQXNM1(KLON,NCLV) ! new values for zqx at time+1 at level above +REAL(KIND=JPRB) :: ZFLUXQ(KLON,NCLV) ! fluxes convergence of species (needed?) +! Keep the following for possible future total water variance scheme? +!REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature +!REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction +!REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance +!REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) +!REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + +REAL(KIND=JPRB) :: ZPFPLSX(KLON,KLEV+1,NCLV) ! generalized precipitation flux +REAL(KIND=JPRB) :: ZLNEG(KLON,KLEV,NCLV) ! for negative correction diagnostics +REAL(KIND=JPRB) :: ZMELTMAX(KLON) +REAL(KIND=JPRB) :: ZFRZMAX(KLON) +REAL(KIND=JPRB) :: ZICETOT(KLON) + +REAL(KIND=JPRB) :: ZQXN2D(KLON,KLEV,NCLV) ! water variables store + +REAL(KIND=JPRB) :: ZQSMIX(KLON,KLEV) ! diagnostic mixed phase saturation +!REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation +REAL(KIND=JPRB) :: ZQSLIQ(KLON,KLEV) ! liquid water saturation +REAL(KIND=JPRB) :: ZQSICE(KLON,KLEV) ! ice water saturation + +!REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH +!REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq +!REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + +REAL(KIND=JPRB) :: ZFOEEWMT(KLON,KLEV) +REAL(KIND=JPRB) :: ZFOEEW(KLON,KLEV) +REAL(KIND=JPRB) :: ZFOEELIQT(KLON,KLEV) +!REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + +REAL(KIND=JPRB) :: ZDQSLIQDT(KLON), ZDQSICEDT(KLON), ZDQSMIXDT(KLON) +REAL(KIND=JPRB) :: ZCORQSLIQ(KLON) +REAL(KIND=JPRB) :: ZCORQSICE(KLON) +!REAL(KIND=JPRB) :: ZCORQSBIN(KLON) +REAL(KIND=JPRB) :: ZCORQSMIX(KLON) +REAL(KIND=JPRB) :: ZEVAPLIMLIQ(KLON), ZEVAPLIMICE(KLON), ZEVAPLIMMIX(KLON) + +!------------------------------------------------------- +! SOURCE/SINK array for implicit and explicit terms +!------------------------------------------------------- +! a POSITIVE value entered into the arrays is a... +! Source of this variable +! | +! | Sink of this variable +! | | +! V V +! ZSOLQA(JL,IQa,IQb) = explicit terms +! ZSOLQB(JL,IQa,IQb) = implicit terms +! Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is +! a source of NCLDQL and a sink of IQV +! put 'magic' source terms such as PLUDE from +! detrainment into explicit source/sink array diagnognal +! ZSOLQA(NCLDQL,NCLDQL)= -PLUDE +! i.e. A positive value is a sink!????? weird... +!------------------------------------------------------- + +REAL(KIND=JPRB) :: ZSOLQA(KLON,NCLV,NCLV) ! explicit sources and sinks +REAL(KIND=JPRB) :: ZSOLQB(KLON,NCLV,NCLV) ! implicit sources and sinks + ! e.g. microphysical pathways between ice variables. +REAL(KIND=JPRB) :: ZQLHS(KLON,NCLV,NCLV) ! n x n matrix storing the LHS of implicit solver +REAL(KIND=JPRB) :: ZVQX(NCLV) ! fall speeds of three categories +REAL(KIND=JPRB) :: ZEXPLICIT, ZRATIO(KLON,NCLV), ZSINKSUM(KLON,NCLV) + +! for sedimentation source/sink terms +REAL(KIND=JPRB) :: ZFALLSINK(KLON,NCLV) +REAL(KIND=JPRB) :: ZFALLSRCE(KLON,NCLV) + +! for convection detrainment source and subsidence source/sink terms +REAL(KIND=JPRB) :: ZCONVSRCE(KLON,NCLV) +REAL(KIND=JPRB) :: ZCONVSINK(KLON,NCLV) + +! for supersaturation source term from previous timestep +REAL(KIND=JPRB) :: ZPSUPSATSRCE(KLON,NCLV) + +! Numerical fit to wet bulb temperature +REAL(KIND=JPRB),PARAMETER :: ZTW1 = 1329.31_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW2 = 0.0074615_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW3 = 0.85E5_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW4 = 40.637_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW5 = 275.0_JPRB + +REAL(KIND=JPRB) :: ZSUBSAT ! Subsaturation for snow melting term +REAL(KIND=JPRB) :: ZTDMTW0 ! Diff between dry-bulb temperature and + ! temperature when wet-bulb = 0degC + +! Variables for deposition term +REAL(KIND=JPRB) :: ZTCG ! Temperature dependent function for ice PSD +REAL(KIND=JPRB) :: ZFACX1I, ZFACX1S! PSD correction factor +REAL(KIND=JPRB) :: ZAPLUSB,ZCORRFAC,ZCORRFAC2,ZPR02,ZTERM1,ZTERM2 ! for ice dep +REAL(KIND=JPRB) :: ZCLDTOPDIST(KLON) ! Distance from cloud top +REAL(KIND=JPRB) :: ZINFACTOR ! No. of ice nuclei factor for deposition + +! Autoconversion/accretion/riming/evaporation +INTEGER(KIND=JPIM) :: IWARMRAIN +INTEGER(KIND=JPIM) :: IEVAPRAIN +INTEGER(KIND=JPIM) :: IEVAPSNOW +INTEGER(KIND=JPIM) :: IDEPICE +REAL(KIND=JPRB) :: ZRAINACC(KLON) +REAL(KIND=JPRB) :: ZRAINCLD(KLON) +REAL(KIND=JPRB) :: ZSNOWRIME(KLON) +REAL(KIND=JPRB) :: ZSNOWCLD(KLON) +REAL(KIND=JPRB) :: ZESATLIQ +REAL(KIND=JPRB) :: ZFALLCORR +REAL(KIND=JPRB) :: ZLAMBDA +REAL(KIND=JPRB) :: ZEVAP_DENOM +REAL(KIND=JPRB) :: ZCORR2 +REAL(KIND=JPRB) :: ZKA +REAL(KIND=JPRB) :: ZCONST +REAL(KIND=JPRB) :: ZTEMP + +! Rain freezing +LOGICAL :: LLRAINLIQ(KLON) ! True if majority of raindrops are liquid (no ice core) + +!---------------------------- +! End: new microphysics +!---------------------------- + +!---------------------- +! SCM budget statistics +!---------------------- +REAL(KIND=JPRB) :: ZRAIN + +REAL(KIND=JPRB) :: Z_TMP1(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP2(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP3(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP4(KFDIA-KIDIA+1) +!REAL(KIND=JPRB) :: Z_TMP5(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP6(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMP7(KFDIA-KIDIA+1) +REAL(KIND=JPRB) :: Z_TMPK(KFDIA-KIDIA+1,KLEV) +!REAL(KIND=JPRB) :: ZCON1,ZCON2 +REAL(KIND=JPRB) :: ZHOOK_HANDLE +REAL(KIND=JPRB) :: ZTMPL,ZTMPI,ZTMPA + +REAL(KIND=JPRB) :: ZMM,ZRR +REAL(KIND=JPRB) :: ZRG(KLON) + +REAL(KIND=JPRB) :: ZBUDCC(KLON,KFLDX) ! extra fields +REAL(KIND=JPRB) :: ZBUDL(KLON,KFLDX) ! extra fields +REAL(KIND=JPRB) :: ZBUDI(KLON,KFLDX) ! extra fields + +REAL(KIND=JPRB) :: ZZSUM, ZZRATIO +REAL(KIND=JPRB) :: ZEPSILON + +REAL(KIND=JPRB) :: ZCOND1, ZQP +TYPE(TOMCST) ,INTENT(IN) :: YDCST +TYPE(TOETHF) ,INTENT(IN) :: YDTHF +TYPE(TECLDP) ,INTENT(IN) :: YDECLDP + +#include "abor1.intfb.h" + +!DIR$ VFUNCTION EXPHF +#include "fcttre.ycst.h" +#include "fccld.ydthf.h" + +!=============================================================================== +!IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) +ASSOCIATE( & + & LAERICEAUTO=>YDECLDP%LAERICEAUTO, LAERICESED=>YDECLDP%LAERICESED, & + & LAERLIQAUTOLSP=>YDECLDP%LAERLIQAUTOLSP, LAERLIQCOLL=>YDECLDP%LAERLIQCOLL, & + & LCLDBUDGET=>YDECLDP%LCLDBUDGET, NCLDTOP=>YDECLDP%NCLDTOP, & + & NSSOPT=>YDECLDP%NSSOPT, RAMID=>YDECLDP%RAMID, RAMIN=>YDECLDP%RAMIN, & + & RCCN=>YDECLDP%RCCN, RCLCRIT_LAND=>YDECLDP%RCLCRIT_LAND, & + & RCLCRIT_SEA=>YDECLDP%RCLCRIT_SEA, RCLDIFF=>YDECLDP%RCLDIFF, & + & RCLDIFF_CONVI=>YDECLDP%RCLDIFF_CONVI, RCLDTOPCF=>YDECLDP%RCLDTOPCF, & + & RCL_APB1=>YDECLDP%RCL_APB1, RCL_APB2=>YDECLDP%RCL_APB2, & + & RCL_APB3=>YDECLDP%RCL_APB3, RCL_CDENOM1=>YDECLDP%RCL_CDENOM1, & + & RCL_CDENOM2=>YDECLDP%RCL_CDENOM2, RCL_CDENOM3=>YDECLDP%RCL_CDENOM3, & + & RCL_CONST1I=>YDECLDP%RCL_CONST1I, RCL_CONST1R=>YDECLDP%RCL_CONST1R, & + & RCL_CONST1S=>YDECLDP%RCL_CONST1S, RCL_CONST2I=>YDECLDP%RCL_CONST2I, & + & RCL_CONST2R=>YDECLDP%RCL_CONST2R, RCL_CONST2S=>YDECLDP%RCL_CONST2S, & + & RCL_CONST3I=>YDECLDP%RCL_CONST3I, RCL_CONST3R=>YDECLDP%RCL_CONST3R, & + & RCL_CONST3S=>YDECLDP%RCL_CONST3S, RCL_CONST4I=>YDECLDP%RCL_CONST4I, & + & RCL_CONST4R=>YDECLDP%RCL_CONST4R, RCL_CONST4S=>YDECLDP%RCL_CONST4S, & + & RCL_CONST5I=>YDECLDP%RCL_CONST5I, RCL_CONST5R=>YDECLDP%RCL_CONST5R, & + & RCL_CONST5S=>YDECLDP%RCL_CONST5S, RCL_CONST6I=>YDECLDP%RCL_CONST6I, & + & RCL_CONST6R=>YDECLDP%RCL_CONST6R, RCL_CONST6S=>YDECLDP%RCL_CONST6S, & + & RCL_CONST7S=>YDECLDP%RCL_CONST7S, RCL_CONST8S=>YDECLDP%RCL_CONST8S, & + & RCL_FAC1=>YDECLDP%RCL_FAC1, RCL_FAC2=>YDECLDP%RCL_FAC2, & + & RCL_FZRAB=>YDECLDP%RCL_FZRAB, RCL_KA273=>YDECLDP%RCL_KA273, & + & RCL_KKAAC=>YDECLDP%RCL_KKAAC, RCL_KKAAU=>YDECLDP%RCL_KKAAU, & + & RCL_KKBAC=>YDECLDP%RCL_KKBAC, RCL_KKBAUN=>YDECLDP%RCL_KKBAUN, & + & RCL_KKBAUQ=>YDECLDP%RCL_KKBAUQ, & + & RCL_KK_CLOUD_NUM_LAND=>YDECLDP%RCL_KK_CLOUD_NUM_LAND, & + & RCL_KK_CLOUD_NUM_SEA=>YDECLDP%RCL_KK_CLOUD_NUM_SEA, RCL_X3I=>YDECLDP%RCL_X3I, & + & RCOVPMIN=>YDECLDP%RCOVPMIN, RDENSREF=>YDECLDP%RDENSREF, & + & RDEPLIQREFDEPTH=>YDECLDP%RDEPLIQREFDEPTH, & + & RDEPLIQREFRATE=>YDECLDP%RDEPLIQREFRATE, RICEHI1=>YDECLDP%RICEHI1, & + & RICEHI2=>YDECLDP%RICEHI2, RICEINIT=>YDECLDP%RICEINIT, RKCONV=>YDECLDP%RKCONV, & + & RKOOPTAU=>YDECLDP%RKOOPTAU, RLCRITSNOW=>YDECLDP%RLCRITSNOW, & + & RLMIN=>YDECLDP%RLMIN, RNICE=>YDECLDP%RNICE, RPECONS=>YDECLDP%RPECONS, & + & RPRC1=>YDECLDP%RPRC1, RPRECRHMAX=>YDECLDP%RPRECRHMAX, & + & RSNOWLIN1=>YDECLDP%RSNOWLIN1, RSNOWLIN2=>YDECLDP%RSNOWLIN2, & + & RTAUMEL=>YDECLDP%RTAUMEL, RTHOMO=>YDECLDP%RTHOMO, RVICE=>YDECLDP%RVICE, & + & RVRAIN=>YDECLDP%RVRAIN, RVRFACTOR=>YDECLDP%RVRFACTOR, & + & RVSNOW=>YDECLDP%RVSNOW, RG=>YDCST%RG, RD=>YDCST%RD, & + & RCPD=>YDCST%RCPD, RETV=>YDCST%RETV, RLVTT=>YDCST%RLVTT, & + & RLSTT=>YDCST%RLSTT, RLMLT=>YDCST%RLMLT, RTT=>YDCST%RTT, & + & RV=>YDCST%RV, R4LES=>YDTHF%R4LES, R4IES=>YDTHF%R4IES, & + & R5LES=>YDTHF%R5LES, R5IES=>YDTHF%R5IES, RALVDCP=>YDTHF%RALVDCP, & + & RALSDCP=>YDTHF%RALSDCP, RALFDCP=>YDTHF%RALFDCP ) + +!=============================================================================== +! 0.0 Beginning of timestep book-keeping +!---------------------------------------------------------------------- + +!###################################################################### +! 0. *** SET UP CONSTANTS *** +!###################################################################### + +ZEPSILON=100._JPRB*EPSILON(ZEPSILON) + +! --------------------------------------------------------------------- +! Set version of warm-rain autoconversion/accretion +! IWARMRAIN = 1 ! Sundquist +! IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) +! --------------------------------------------------------------------- +IWARMRAIN = 2 +! --------------------------------------------------------------------- +! Set version of rain evaporation +! IEVAPRAIN = 1 ! Sundquist +! IEVAPRAIN = 2 ! Abel and Boutle (2013) +! --------------------------------------------------------------------- +IEVAPRAIN = 2 +! --------------------------------------------------------------------- +! Set version of snow evaporation +! IEVAPSNOW = 1 ! Sundquist +! IEVAPSNOW = 2 ! New +! --------------------------------------------------------------------- +IEVAPSNOW = 1 +! --------------------------------------------------------------------- +! Set version of ice deposition +! IDEPICE = 1 ! Rotstayn (2001) +! IDEPICE = 2 ! New +! --------------------------------------------------------------------- +IDEPICE = 1 + +! --------------------- +! Some simple constants +! --------------------- +ZQTMST = 1.0_JPRB/PTSPHY +ZGDCP = RG/RCPD +ZRDCP = RD/RCPD +ZCONS1A = RCPD/(RLMLT*RG*RTAUMEL) +ZEPSEC = 1.E-14_JPRB +ZRG_R = 1.0_JPRB/RG +ZRLDCP = 1.0_JPRB/(RALSDCP-RALVDCP) + +! Note: Defined in module/yoecldp.F90 +! NCLDQL=1 ! liquid cloud water +! NCLDQI=2 ! ice cloud water +! NCLDQR=3 ! rain water +! NCLDQS=4 ! snow +! NCLDQV=5 ! vapour + +! ----------------------------------------------- +! Define species phase, 0=vapour, 1=liquid, 2=ice +! ----------------------------------------------- +IPHASE(NCLDQV)=0 +IPHASE(NCLDQL)=1 +IPHASE(NCLDQR)=1 +IPHASE(NCLDQI)=2 +IPHASE(NCLDQS)=2 + +! --------------------------------------------------- +! Set up melting/freezing index, +! if an ice category melts/freezes, where does it go? +! --------------------------------------------------- +IMELT(NCLDQV)=-99 +IMELT(NCLDQL)=NCLDQI +IMELT(NCLDQR)=NCLDQS +IMELT(NCLDQI)=NCLDQR +IMELT(NCLDQS)=NCLDQR + +! ----------------------------------------------- +! INITIALIZATION OF OUTPUT TENDENCIES +! ----------------------------------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA +! tendency_loc%T(JL,JK)=0.0_JPRB +! tendency_loc%q(JL,JK)=0.0_JPRB +! tendency_loc%a(JL,JK)=0.0_JPRB + TENDENCY_LOC_T(JL,JK)=0.0_JPRB + TENDENCY_LOC_Q(JL,JK)=0.0_JPRB + TENDENCY_LOC_A(JL,JK)=0.0_JPRB + ENDDO +ENDDO +DO JM=1,NCLV-1 + DO JK=1,KLEV + DO JL=KIDIA,KFDIA +! tendency_loc%cld(JL,JK,JM)=0.0_JPRB + TENDENCY_LOC_CLD(JL,JK,JM)=0.0_JPRB + ENDDO + ENDDO +ENDDO + +! ------------------------- +! set up fall speeds in m/s +! ------------------------- +ZVQX(NCLDQV)=0.0_JPRB +ZVQX(NCLDQL)=0.0_JPRB +ZVQX(NCLDQI)=RVICE +ZVQX(NCLDQR)=RVRAIN +ZVQX(NCLDQS)=RVSNOW +LLFALL(:)=.FALSE. +DO JM=1,NCLV + IF (ZVQX(JM)>0.0_JPRB) LLFALL(JM)=.TRUE. ! falling species +ENDDO +! Set LLFALL to false for ice (but ice still sediments!) +! Need to rationalise this at some point +LLFALL(NCLDQI)=.FALSE. + + +!###################################################################### +! 1. *** INITIAL VALUES FOR VARIABLES *** +!###################################################################### + + +! ---------------------- +! non CLV initialization +! ---------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA +! ZTP1(JL,JK) = PT(JL,JK)+PTSPHY*tendency_tmp%T(JL,JK) +! ZQX(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*tendency_tmp%q(JL,JK) +! ZQX0(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*tendency_tmp%q(JL,JK) +! ZA(JL,JK) = PA(JL,JK)+PTSPHY*tendency_tmp%a(JL,JK) +! ZAORIG(JL,JK) = PA(JL,JK)+PTSPHY*tendency_tmp%a(JL,JK) + ZTP1(JL,JK) = PT(JL,JK)+PTSPHY*TENDENCY_TMP_T(JL,JK) + ZQX(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*TENDENCY_TMP_Q(JL,JK) + ZQX0(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*TENDENCY_TMP_Q(JL,JK) + ZA(JL,JK) = PA(JL,JK)+PTSPHY*TENDENCY_TMP_A(JL,JK) + ZAORIG(JL,JK) = PA(JL,JK)+PTSPHY*TENDENCY_TMP_A(JL,JK) + ENDDO +ENDDO + +! ------------------------------------- +! initialization for CLV family +! ------------------------------------- +DO JM=1,NCLV-1 + DO JK=1,KLEV + DO JL=KIDIA,KFDIA +! ZQX(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*tendency_tmp%cld(JL,JK,JM) +! ZQX0(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*tendency_tmp%cld(JL,JK,JM) + ZQX(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*TENDENCY_TMP_CLD(JL,JK,JM) + ZQX0(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*TENDENCY_TMP_CLD(JL,JK,JM) + ENDDO + ENDDO +ENDDO + +!------------- +! zero arrays +!------------- +ZPFPLSX(:,:,:) = 0.0_JPRB ! precip fluxes +ZQXN2D(:,:,:) = 0.0_JPRB ! end of timestep values in 2D +ZLNEG(:,:,:) = 0.0_JPRB ! negative input check +PRAINFRAC_TOPRFZ(:) =0.0_JPRB ! rain fraction at top of refreezing layer +LLRAINLIQ(:) = .TRUE. ! Assume all raindrops are liquid initially + +! ---------------------------------------------------- +! Tidy up very small cloud cover or total cloud water +! ---------------------------------------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + IF (ZQX(JL,JK,NCLDQL)+ZQX(JL,JK,NCLDQI)273K + !--------------------------------------------- + ZALFA=FOEDELTA(ZTP1(JL,JK)) + ZFOEEW(JL,JK)=MIN((ZALFA*FOEELIQ(ZTP1(JL,JK))+ & + & (1.0_JPRB-ZALFA)*FOEEICE(ZTP1(JL,JK)))/PAP(JL,JK),0.5_JPRB) + ZFOEEW(JL,JK)=MIN(0.5_JPRB,ZFOEEW(JL,JK)) + ZQSICE(JL,JK)=ZFOEEW(JL,JK)/(1.0_JPRB-RETV*ZFOEEW(JL,JK)) + + !---------------------------------- + ! liquid water saturation + !---------------------------------- + ZFOEELIQT(JL,JK)=MIN(FOEELIQ(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + ZQSLIQ(JL,JK)=ZFOEELIQT(JL,JK) + ZQSLIQ(JL,JK)=ZQSLIQ(JL,JK)/(1.0_JPRB-RETV*ZQSLIQ(JL,JK)) + +! !---------------------------------- +! ! ice water saturation +! !---------------------------------- +! ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) +! ZQSICE(JL,JK)=ZFOEEICET(JL,JK) +! ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + ENDDO + +ENDDO + +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + + + !------------------------------------------ + ! Ensure cloud fraction is between 0 and 1 + !------------------------------------------ + ZA(JL,JK)=MAX(0.0_JPRB,MIN(1.0_JPRB,ZA(JL,JK))) + + !------------------------------------------------------------------- + ! Calculate liq/ice fractions (no longer a diagnostic relationship) + !------------------------------------------------------------------- + ZLI(JL,JK)=ZQX(JL,JK,NCLDQL)+ZQX(JL,JK,NCLDQI) + IF (ZLI(JL,JK)>RLMIN) THEN + ZLIQFRAC(JL,JK)=ZQX(JL,JK,NCLDQL)/ZLI(JL,JK) + ZICEFRAC(JL,JK)=1.0_JPRB-ZLIQFRAC(JL,JK) + ELSE + ZLIQFRAC(JL,JK)=0.0_JPRB + ZICEFRAC(JL,JK)=0.0_JPRB + ENDIF + + ENDDO +ENDDO + +!###################################################################### +! 2. *** CONSTANTS AND PARAMETERS *** +!###################################################################### +! Calculate L in updrafts of bl-clouds +! Specify QS, P/PS for tropopause (for c2) +! And initialize variables +!------------------------------------------ + +!--------------------------------- +! Find tropopause level (ZTRPAUS) +!--------------------------------- +DO JL=KIDIA,KFDIA + ZTRPAUS(JL)=0.1_JPRB + ZPAPHD(JL)=1.0_JPRB/PAPH(JL,KLEV+1) +ENDDO +DO JK=1,KLEV-1 + DO JL=KIDIA,KFDIA + ZSIG=PAP(JL,JK)*ZPAPHD(JL) + IF (ZSIG>0.1_JPRB.AND.ZSIG<0.4_JPRB.AND.ZTP1(JL,JK)>ZTP1(JL,JK+1)) THEN + ZTRPAUS(JL)=ZSIG + ENDIF + ENDDO +ENDDO + +!----------------------------- +! Reset single level variables +!----------------------------- + +ZANEWM1(:) = 0.0_JPRB +ZDA(:) = 0.0_JPRB +ZCOVPCLR(:) = 0.0_JPRB +ZCOVPMAX(:) = 0.0_JPRB +ZCOVPTOT(:) = 0.0_JPRB +ZCLDTOPDIST(:) = 0.0_JPRB + +!###################################################################### +! 3. *** PHYSICS *** +!###################################################################### + + +!---------------------------------------------------------------------- +! START OF VERTICAL LOOP +!---------------------------------------------------------------------- + +DO JK=NCLDTOP,KLEV + +!---------------------------------------------------------------------- +! 3.0 INITIALIZE VARIABLES +!---------------------------------------------------------------------- + + !--------------------------------- + ! First guess microphysics + !--------------------------------- + DO JM=1,NCLV + DO JL=KIDIA,KFDIA + ZQXFG(JL,JM)=ZQX(JL,JK,JM) + ENDDO + ENDDO + + !--------------------------------- + ! Set KLON arrays to zero + !--------------------------------- + + ZLICLD(:) = 0.0_JPRB + ZRAINAUT(:) = 0.0_JPRB ! currently needed for diags + ZRAINACC(:) = 0.0_JPRB ! currently needed for diags + ZSNOWAUT(:) = 0.0_JPRB ! needed + ZLDEFR(:) = 0.0_JPRB + ZACUST(:) = 0.0_JPRB ! set later when needed + ZQPRETOT(:) = 0.0_JPRB + ZLFINALSUM(:)= 0.0_JPRB + + ! Required for first guess call + ZLCOND1(:) = 0.0_JPRB + ZLCOND2(:) = 0.0_JPRB + ZSUPSAT(:) = 0.0_JPRB + ZLEVAPL(:) = 0.0_JPRB + ZLEVAPI(:) = 0.0_JPRB + + !------------------------------------- + ! solvers for cloud fraction + !------------------------------------- + ZSOLAB(:) = 0.0_JPRB + ZSOLAC(:) = 0.0_JPRB + + !------------------------------------------ + ! reset matrix so missing pathways are set + !------------------------------------------ + ZSOLQB(:,:,:) = 0.0_JPRB + ZSOLQA(:,:,:) = 0.0_JPRB + + !---------------------------------- + ! reset new microphysics variables + !---------------------------------- + ZFALLSRCE(:,:) = 0.0_JPRB + ZFALLSINK(:,:) = 0.0_JPRB + ZCONVSRCE(:,:) = 0.0_JPRB + ZCONVSINK(:,:) = 0.0_JPRB + ZPSUPSATSRCE(:,:) = 0.0_JPRB + ZRATIO(:,:) = 0.0_JPRB + ZICETOT(:) = 0.0_JPRB + + DO JL=KIDIA,KFDIA + + !------------------------- + ! derived variables needed + !------------------------- + + ZDP(JL) = PAPH(JL,JK+1)-PAPH(JL,JK) ! dp + ZGDP(JL) = RG/ZDP(JL) ! g/dp + ZRHO(JL) = PAP(JL,JK)/(RD*ZTP1(JL,JK)) ! p/RT air density + + ZDTGDP(JL) = PTSPHY*ZGDP(JL) ! dt g/dp + ZRDTGDP(JL) = ZDP(JL)*(1.0_JPRB/(PTSPHY*RG)) ! 1/(dt g/dp) + + IF (JK>1) ZDTGDPF(JL) = PTSPHY*RG/(PAP(JL,JK)-PAP(JL,JK-1)) + + !------------------------------------ + ! Calculate dqs/dT correction factor + !------------------------------------ + ! Reminder: RETV=RV/RD-1 + + ! liquid + ZFACW = R5LES/((ZTP1(JL,JK)-R4LES)**2) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEELIQT(JL,JK)) + ZDQSLIQDT(JL) = ZFACW*ZCOR*ZQSLIQ(JL,JK) + ZCORQSLIQ(JL) = 1.0_JPRB+RALVDCP*ZDQSLIQDT(JL) + + ! ice + ZFACI = R5IES/((ZTP1(JL,JK)-R4IES)**2) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEEW(JL,JK)) + ZDQSICEDT(JL) = ZFACI*ZCOR*ZQSICE(JL,JK) + ZCORQSICE(JL) = 1.0_JPRB+RALSDCP*ZDQSICEDT(JL) + + ! diagnostic mixed + ZALFAW = ZFOEALFA(JL,JK) + ZALFAWM(JL) = ZALFAW + ZFAC = ZALFAW*ZFACW+(1.0_JPRB-ZALFAW)*ZFACI + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEEWMT(JL,JK)) + ZDQSMIXDT(JL) = ZFAC*ZCOR*ZQSMIX(JL,JK) + ZCORQSMIX(JL) = 1.0_JPRB+FOELDCPM(ZTP1(JL,JK))*ZDQSMIXDT(JL) + + ! evaporation/sublimation limits + ZEVAPLIMMIX(JL) = MAX((ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSMIX(JL),0.0_JPRB) + ZEVAPLIMLIQ(JL) = MAX((ZQSLIQ(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSLIQ(JL),0.0_JPRB) + ZEVAPLIMICE(JL) = MAX((ZQSICE(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSICE(JL),0.0_JPRB) + + !-------------------------------- + ! in-cloud consensate amount + !-------------------------------- + ZTMPA = 1.0_JPRB/MAX(ZA(JL,JK),ZEPSEC) + ZLIQCLD(JL) = ZQX(JL,JK,NCLDQL)*ZTMPA + ZICECLD(JL) = ZQX(JL,JK,NCLDQI)*ZTMPA + ZLICLD(JL) = ZLIQCLD(JL)+ZICECLD(JL) + + ENDDO + + !------------------------------------------------ + ! Evaporate very small amounts of liquid and ice + !------------------------------------------------ + DO JL=KIDIA,KFDIA + + IF (ZQX(JL,JK,NCLDQL) < RLMIN) THEN + ZSOLQA(JL,NCLDQV,NCLDQL) = ZQX(JL,JK,NCLDQL) + ZSOLQA(JL,NCLDQL,NCLDQV) = -ZQX(JL,JK,NCLDQL) + ENDIF + + IF (ZQX(JL,JK,NCLDQI) < RLMIN) THEN + ZSOLQA(JL,NCLDQV,NCLDQI) = ZQX(JL,JK,NCLDQI) + ZSOLQA(JL,NCLDQI,NCLDQV) = -ZQX(JL,JK,NCLDQI) + ENDIF + + ENDDO + + !--------------------------------------------------------------------- + ! 3.1 ICE SUPERSATURATION ADJUSTMENT + !--------------------------------------------------------------------- + ! Note that the supersaturation adjustment is made with respect to + ! liquid saturation: when T>0C + ! ice saturation: when T<0C + ! with an adjustment made to allow for ice + ! supersaturation in the clear sky + ! Note also that the KOOP factor automatically clips the supersaturation + ! to a maximum set by the liquid water saturation mixing ratio + ! important for temperatures near to but below 0C + !----------------------------------------------------------------------- + +!DIR$ NOFUSION + DO JL=KIDIA,KFDIA + + !----------------------------------- + ! 3.1.1 Supersaturation limit (from Koop) + !----------------------------------- + ! Needs to be set for all temperatures + ZFOKOOP(JL)=FOKOOP(ZTP1(JL,JK)) + ENDDO + DO JL=KIDIA,KFDIA + + IF (ZTP1(JL,JK)>=RTT .OR. NSSOPT==0) THEN + ZFAC = 1.0_JPRB + ZFACI = 1.0_JPRB + ELSE + ZFAC = ZA(JL,JK)+ZFOKOOP(JL)*(1.0_JPRB-ZA(JL,JK)) + ZFACI = PTSPHY/RKOOPTAU + ENDIF + + !------------------------------------------------------------------- + ! 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + ! correction factor + ! [#Note: QSICE or QSLIQ] + !------------------------------------------------------------------- + + ! Calculate supersaturation to add to cloud + IF (ZA(JL,JK) > 1.0_JPRB-RAMIN) THEN + ZSUPSAT(JL) = MAX((ZQX(JL,JK,NCLDQV)-ZFAC*ZQSICE(JL,JK))/ZCORQSICE(JL)& + & ,0.0_JPRB) + ELSE + ! Calculate environmental humidity supersaturation + ZQP1ENV = (ZQX(JL,JK,NCLDQV) - ZA(JL,JK)*ZQSICE(JL,JK))/ & + & MAX(1.0_JPRB-ZA(JL,JK),ZEPSILON) + !& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + ZSUPSAT(JL) = MAX((1.0_JPRB-ZA(JL,JK))*(ZQP1ENV-ZFAC*ZQSICE(JL,JK))& + & /ZCORQSICE(JL),0.0_JPRB) + ENDIF + + !------------------------------------------------------------------- + ! Here the supersaturation is turned into liquid water + ! However, if the temperature is below the threshold for homogeneous + ! freezing then the supersaturation is turned instantly to ice. + !-------------------------------------------------------------------- + + IF (ZSUPSAT(JL) > ZEPSEC) THEN + + IF (ZTP1(JL,JK) > RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)+ZSUPSAT(JL) + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)-ZSUPSAT(JL) + ! Include liquid in first guess + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZSUPSAT(JL) + ELSE + ! Turn supersaturation into ice water + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)+ZSUPSAT(JL) + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)-ZSUPSAT(JL) + ! Add ice to first guess for deposition term + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZSUPSAT(JL) + ENDIF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC(JL) = (1.0_JPRB-ZA(JL,JK))*ZFACI + + ENDIF + + !------------------------------------------------------- + ! 3.1.3 Include supersaturation from previous timestep + ! (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + !------------------------------------------------------- + IF (PSUPSAT(JL,JK)>ZEPSEC) THEN + IF (ZTP1(JL,JK) > RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(JL,NCLDQL,NCLDQL) = ZSOLQA(JL,NCLDQL,NCLDQL)+PSUPSAT(JL,JK) + ZPSUPSATSRCE(JL,NCLDQL) = PSUPSAT(JL,JK) + ! Add liquid to first guess for deposition term + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+PSUPSAT(JL,JK) + ! Store cloud budget diagnostics if required + ELSE + ! Turn supersaturation into ice water + ZSOLQA(JL,NCLDQI,NCLDQI) = ZSOLQA(JL,NCLDQI,NCLDQI)+PSUPSAT(JL,JK) + ZPSUPSATSRCE(JL,NCLDQI) = PSUPSAT(JL,JK) + ! Add ice to first guess for deposition term + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+PSUPSAT(JL,JK) + ! Store cloud budget diagnostics if required + ENDIF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC(JL)=(1.0_JPRB-ZA(JL,JK))*ZFACI + ! Store cloud budget diagnostics if required + ENDIF + + ENDDO ! on JL + + !--------------------------------------------------------------------- + ! 3.2 DETRAINMENT FROM CONVECTION + !--------------------------------------------------------------------- + ! * Diagnostic T-ice/liq split retained for convection + ! Note: This link is now flexible and a future convection + ! scheme can detrain explicit seperate budgets of: + ! cloud water, ice, rain and snow + ! * There is no (1-ZA) multiplier term on the cloud detrainment + ! term, since is now written in mass-flux terms + ! [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + !--------------------------------------------------------------------- + IF (JK < KLEV .AND. JK>=NCLDTOP) THEN + + DO JL=KIDIA,KFDIA + + PLUDE(JL,JK)=PLUDE(JL,JK)*ZDTGDP(JL) + + IF(LDCUM(JL).AND.PLUDE(JL,JK) > RLMIN.AND.PLU(JL,JK+1)> ZEPSEC) THEN + + ZSOLAC(JL)=ZSOLAC(JL)+PLUDE(JL,JK)/PLU(JL,JK+1) + ! *diagnostic temperature split* + ZALFAW = ZFOEALFA(JL,JK) + ZCONVSRCE(JL,NCLDQL) = ZALFAW*PLUDE(JL,JK) + ZCONVSRCE(JL,NCLDQI) = (1.0_JPRB-ZALFAW)*PLUDE(JL,JK) + ZSOLQA(JL,NCLDQL,NCLDQL) = ZSOLQA(JL,NCLDQL,NCLDQL)+ZCONVSRCE(JL,NCLDQL) + ZSOLQA(JL,NCLDQI,NCLDQI) = ZSOLQA(JL,NCLDQI,NCLDQI)+ZCONVSRCE(JL,NCLDQI) + + ELSE + + PLUDE(JL,JK)=0.0_JPRB + + ENDIF + ! *convective snow detrainment source + IF (LDCUM(JL)) ZSOLQA(JL,NCLDQS,NCLDQS) = ZSOLQA(JL,NCLDQS,NCLDQS) + PSNDE(JL,JK)*ZDTGDP(JL) + + ENDDO + + ENDIF ! JK NCLDTOP) THEN + + DO JL=KIDIA,KFDIA + ZMF(JL)=MAX(0.0_JPRB,(PMFU(JL,JK)+PMFD(JL,JK))*ZDTGDP(JL)) + ZACUST(JL)=ZMF(JL)*ZANEWM1(JL) + ENDDO + + DO JM=1,NCLV + IF (.NOT.LLFALL(JM).AND.IPHASE(JM)>0) THEN + DO JL=KIDIA,KFDIA + ZLCUST(JL,JM)=ZMF(JL)*ZQXNM1(JL,JM) + ! record total flux for enthalpy budget: + ZCONVSRCE(JL,JM)=ZCONVSRCE(JL,JM)+ZLCUST(JL,JM) + ENDDO + ENDIF + ENDDO + + ! Now have to work out how much liquid evaporates at arrival point + ! since there is no prognostic memory for in-cloud humidity, i.e. + ! we always assume cloud is saturated. + + DO JL=KIDIA,KFDIA + ZDTDP=ZRDCP*0.5_JPRB*(ZTP1(JL,JK-1)+ZTP1(JL,JK))/PAPH(JL,JK) + ZDTFORC = ZDTDP*(PAP(JL,JK)-PAP(JL,JK-1)) + ![#Note: Diagnostic mixed phase should be replaced below] + ZDQS(JL)=ZANEWM1(JL)*ZDTFORC*ZDQSMIXDT(JL) + ENDDO + + DO JM=1,NCLV + IF (.NOT.LLFALL(JM).AND.IPHASE(JM)>0) THEN + DO JL=KIDIA,KFDIA + ZLFINAL=MAX(0.0_JPRB,ZLCUST(JL,JM)-ZDQS(JL)) !lim to zero + ! no supersaturation allowed incloud ---V + ZEVAP=MIN((ZLCUST(JL,JM)-ZLFINAL),ZEVAPLIMMIX(JL)) +! ZEVAP=0.0_JPRB + ZLFINAL=ZLCUST(JL,JM)-ZEVAP + ZLFINALSUM(JL)=ZLFINALSUM(JL)+ZLFINAL ! sum + + ZSOLQA(JL,JM,JM) = ZSOLQA(JL,JM,JM)+ZLCUST(JL,JM) ! whole sum + ZSOLQA(JL,NCLDQV,JM) = ZSOLQA(JL,NCLDQV,JM)+ZEVAP + ZSOLQA(JL,JM,NCLDQV) = ZSOLQA(JL,JM,NCLDQV)-ZEVAP + ENDDO + ENDIF + ENDDO + + ! Reset the cloud contribution if no cloud water survives to this level: + DO JL=KIDIA,KFDIA + IF (ZLFINALSUM(JL)NCLDTOP + + !--------------------------------------------------------------------- + ! Subsidence sink of cloud to the layer below + ! (Implicit - re. CFL limit on convective mass flux) + !--------------------------------------------------------------------- + + DO JL=KIDIA,KFDIA + + IF(JK 0 .AND. PLUDE(JL,JK) > ZEPSEC)& + & ZLDIFDT(JL)=RCLDIFF_CONVI*ZLDIFDT(JL) + ENDDO + + ! At the moment, works on mixed RH profile and partitioned ice/liq fraction + ! so that it is similar to previous scheme + ! Should apply RHw for liquid cloud and RHi for ice cloud separately + DO JL=KIDIA,KFDIA + IF(ZLI(JL,JK) > ZEPSEC) THEN + ! Calculate environmental humidity +! ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& +! & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) +! ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB) + ZLEROS=ZA(JL,JK)*ZE + ZLEROS=MIN(ZLEROS,ZEVAPLIMMIX(JL)) + ZLEROS=MIN(ZLEROS,ZLI(JL,JK)) + ZAEROS=ZLEROS/ZLICLD(JL) !if linear term + + ! Erosion is -ve LINEAR in L,A + ZSOLAC(JL)=ZSOLAC(JL)-ZAEROS !linear + + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)+ZLIQFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)-ZLIQFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)+ZICEFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)-ZICEFRAC(JL,JK)*ZLEROS + + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + !---------------------------------------------------------------------- + ! calculate dqs/dt + ! Note: For the separate prognostic Qi and Ql, one would ideally use + ! Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + ! forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + ! These would then instantaneous freeze if T<-38C or lead to ice growth + ! by deposition in warmer mixed phase clouds. However, since we do + ! not have a separate prognostic equation for in-cloud humidity or a + ! statistical scheme approach in place, the depositional growth of ice + ! in the mixed phase can not be modelled and we resort to supersaturation + ! wrt ice instanteously converting to ice over one timestep + ! (see Tompkins et al. QJRMS 2007 for details) + ! Thus for the initial implementation the diagnostic mixed phase is + ! retained for the moment, and the level of approximation noted. + !---------------------------------------------------------------------- + + DO JL=KIDIA,KFDIA + ZDTDP = ZRDCP*ZTP1(JL,JK)/PAP(JL,JK) + ZDPMXDT = ZDP(JL)*ZQTMST + ZMFDN = 0.0_JPRB + IF(JK < KLEV) ZMFDN=PMFU(JL,JK+1)+PMFD(JL,JK+1) + ZWTOT = PVERVEL(JL,JK)+0.5_JPRB*RG*(PMFU(JL,JK)+PMFD(JL,JK)+ZMFDN) + ZWTOT = MIN(ZDPMXDT,MAX(-ZDPMXDT,ZWTOT)) + ZZZDT = PHRSW(JL,JK)+PHRLW(JL,JK) + ZDTDIAB = MIN(ZDPMXDT*ZDTDP,MAX(-ZDPMXDT*ZDTDP,ZZZDT))& + & *PTSPHY+RALFDCP*ZLDEFR(JL) +! Note: ZLDEFR should be set to the difference between the mixed phase functions +! in the convection and cloud scheme, but this is not calculated, so is zero and +! the functions must be the same + ZDTFORC = ZDTDP*ZWTOT*PTSPHY+ZDTDIAB + ZQOLD(JL) = ZQSMIX(JL,JK) + ZTOLD(JL) = ZTP1(JL,JK) + ZTP1(JL,JK) = ZTP1(JL,JK)+ZDTFORC + ZTP1(JL,JK) = MAX(ZTP1(JL,JK),160.0_JPRB) + LLFLAG(JL) = .TRUE. + ENDDO + + ! Formerly a call to CUADJTQ(..., ICALL=5) + DO JL=KIDIA,KFDIA + ZQP = 1.0_JPRB/PAP(JL,JK) + ZQSAT = FOEEWM(ZTP1(JL,JK))*ZQP + ZQSAT = MIN(0.5_JPRB,ZQSAT) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV *ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND = (ZQSMIX(JL,JK)-ZQSAT)/(1.0_JPRB+ZQSAT*ZCOR*FOEDEM(ZTP1(JL,JK))) + ZTP1(JL,JK) = ZTP1(JL,JK)+FOELDCPM(ZTP1(JL,JK))*ZCOND + ZQSMIX(JL,JK) = ZQSMIX(JL,JK)-ZCOND + ZQSAT = FOEEWM(ZTP1(JL,JK))*ZQP + ZQSAT = MIN(0.5_JPRB,ZQSAT) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV *ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND1= (ZQSMIX(JL,JK)-ZQSAT)/(1.0_JPRB+ZQSAT*ZCOR*FOEDEM(ZTP1(JL,JK))) + ZTP1(JL,JK) = ZTP1(JL,JK)+FOELDCPM(ZTP1(JL,JK))*ZCOND1 + ZQSMIX(JL,JK) = ZQSMIX(JL,JK)-ZCOND1 + ENDDO + + DO JL=KIDIA,KFDIA + ZDQS(JL) = ZQSMIX(JL,JK)-ZQOLD(JL) + ZQSMIX(JL,JK) = ZQOLD(JL) + ZTP1(JL,JK) = ZTOLD(JL) + ENDDO + + !---------------------------------------------------------------------- + ! 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + ! ---------------------------------------------------------------------- + ! Erosion term is LINEAR in L + ! Changed to be uniform distribution in cloud region + + DO JL=KIDIA,KFDIA + + ! Previous function based on DELTA DISTRIBUTION in cloud: + IF (ZDQS(JL) > 0.0_JPRB) THEN +! If subsidence evaporation term is turned off, then need to use updated +! liquid and cloud here? +! ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + ZLEVAP = ZA(JL,JK)*MIN(ZDQS(JL),ZLICLD(JL)) + ZLEVAP = MIN(ZLEVAP,ZEVAPLIMMIX(JL)) + ZLEVAP = MIN(ZLEVAP,MAX(ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB)) + + ! For first guess call + ZLEVAPL(JL) = ZLIQFRAC(JL,JK)*ZLEVAP + ZLEVAPI(JL) = ZICEFRAC(JL,JK)*ZLEVAP + + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)+ZLIQFRAC(JL,JK)*ZLEVAP + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)-ZLIQFRAC(JL,JK)*ZLEVAP + + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)+ZICEFRAC(JL,JK)*ZLEVAP + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)-ZICEFRAC(JL,JK)*ZLEVAP + + ENDIF + + ENDDO + + !---------------------------------------------------------------------- + ! 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + !---------------------------------------------------------------------- + ! (1) Increase of cloud water in existing clouds + DO JL=KIDIA,KFDIA + IF(ZA(JL,JK) > ZEPSEC.AND.ZDQS(JL) <= -RLMIN) THEN + + ZLCOND1(JL)=MAX(-ZDQS(JL),0.0_JPRB) !new limiter + +!old limiter (significantly improves upper tropospheric humidity rms) + IF(ZA(JL,JK) > 0.99_JPRB) THEN + ZCOR=1.0_JPRB/(1.0_JPRB-RETV*ZQSMIX(JL,JK)) + ZCDMAX=(ZQX(JL,JK,NCLDQV)-ZQSMIX(JL,JK))/& + & (1.0_JPRB+ZCOR*ZQSMIX(JL,JK)*FOEDEM(ZTP1(JL,JK))) + ELSE + ZCDMAX=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/ZA(JL,JK) + ENDIF + ZLCOND1(JL)=MAX(MIN(ZLCOND1(JL),ZCDMAX),0.0_JPRB) +! end old limiter + + ZLCOND1(JL)=ZA(JL,JK)*ZLCOND1(JL) + IF(ZLCOND1(JL) < RLMIN) ZLCOND1(JL)=0.0_JPRB + + !------------------------------------------------------------------------- + ! All increase goes into liquid unless so cold cloud homogeneously freezes + ! Include new liquid formation in first guess value, otherwise liquid + ! remains at cold temperatures until next timestep. + !------------------------------------------------------------------------- + IF (ZTP1(JL,JK)>RTHOMO) THEN + ZSOLQA(JL,NCLDQL,NCLDQV)=ZSOLQA(JL,NCLDQL,NCLDQV)+ZLCOND1(JL) + ZSOLQA(JL,NCLDQV,NCLDQL)=ZSOLQA(JL,NCLDQV,NCLDQL)-ZLCOND1(JL) + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZLCOND1(JL) + ELSE + ZSOLQA(JL,NCLDQI,NCLDQV)=ZSOLQA(JL,NCLDQI,NCLDQV)+ZLCOND1(JL) + ZSOLQA(JL,NCLDQV,NCLDQI)=ZSOLQA(JL,NCLDQV,NCLDQI)-ZLCOND1(JL) + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZLCOND1(JL) + ENDIF + ENDIF + ENDDO + + ! (2) Generation of new clouds (da/dt>0) + + DO JL=KIDIA,KFDIA + + IF(ZDQS(JL) <= -RLMIN .AND. ZA(JL,JK)<1.0_JPRB-ZEPSEC) THEN + + !--------------------------- + ! Critical relative humidity + !--------------------------- + ZRHC=RAMID + ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + ! Increase RHcrit to 1.0 towards the surface (eta>0.8) + IF(ZSIGK > 0.8_JPRB) THEN + ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + ENDIF + +! Commented out for CY37R1 to reduce humidity in high trop and strat +! ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above +! ZBOTT=ZTRPAUS(JL)+0.2_JPRB +! IF(ZSIGK < ZBOTT) THEN +! ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) +! ENDIF + + !--------------------------- + ! Supersaturation options + !--------------------------- + IF (NSSOPT==0) THEN + ! No scheme + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSICE(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZQE=MAX(0.0_JPRB,ZQE) + ELSEIF (NSSOPT==1) THEN + ! Tompkins + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSICE(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZQE=MAX(0.0_JPRB,ZQE) + ELSEIF (NSSOPT==2) THEN + ! Lohmann and Karcher + ZQE=ZQX(JL,JK,NCLDQV) + ELSEIF (NSSOPT==3) THEN + ! Gierens + ZQE=ZQX(JL,JK,NCLDQV)+ZLI(JL,JK) + ENDIF + + IF (ZTP1(JL,JK)>=RTT .OR. NSSOPT==0) THEN + ! No ice supersaturation allowed + ZFAC=1.0_JPRB + ELSE + ! Ice supersaturation + ZFAC=ZFOKOOP(JL) + ENDIF + + IF(ZQE >= ZRHC*ZQSICE(JL,JK)*ZFAC.AND.ZQERTHOMO) THEN + ZSOLQA(JL,NCLDQL,NCLDQV)=ZSOLQA(JL,NCLDQL,NCLDQV)+ZLCOND2(JL) + ZSOLQA(JL,NCLDQV,NCLDQL)=ZSOLQA(JL,NCLDQV,NCLDQL)-ZLCOND2(JL) + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZLCOND2(JL) + ELSE ! homogeneous freezing + ZSOLQA(JL,NCLDQI,NCLDQV)=ZSOLQA(JL,NCLDQI,NCLDQV)+ZLCOND2(JL) + ZSOLQA(JL,NCLDQV,NCLDQI)=ZSOLQA(JL,NCLDQV,NCLDQI)-ZLCOND2(JL) + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZLCOND2(JL) + ENDIF + + ENDIF + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 3.7 Growth of ice by vapour deposition + !---------------------------------------------------------------------- + ! Following Rotstayn et al. 2001: + ! does not use the ice nuclei number from cloudaer.F90 + ! but rather a simple Meyers et al. 1992 form based on the + ! supersaturation and assuming clouds are saturated with + ! respect to liquid water (well mixed), (or Koop adjustment) + ! Growth considered as sink of liquid water if present so + ! Bergeron-Findeisen adjustment in autoconversion term no longer needed + !---------------------------------------------------------------------- + + !-------------------------------------------------------- + !- + !- Ice deposition following Rotstayn et al. (2001) + !- (monodisperse ice particle size distribution) + !- + !-------------------------------------------------------- + IF (IDEPICE == 1) THEN + + DO JL=KIDIA,KFDIA + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JL,JK-1) < RCLDTOPCF .AND. ZA(JL,JK) >= RCLDTOPCF) THEN + ZCLDTOPDIST(JL) = 0.0_JPRB + ELSE + ZCLDTOPDIST(JL) = ZCLDTOPDIST(JL) + ZDP(JL)/(ZRHO(JL)*RG) + ENDIF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JL,JK)RLMIN) THEN ! T<273K + + ZVPICE=FOEEICE(ZTP1(JL,JK))*RV/RD + ZVPLIQ=ZVPICE*ZFOKOOP(JL) + ZICENUCLEI(JL)=1000.0_JPRB*EXP(12.96_JPRB*(ZVPLIQ-ZVPICE)/ZVPLIQ-0.639_JPRB) + + !------------------------------------------------ + ! 2.4e-2 is conductivity of air + ! 8.8 = 700**1/3 = density of ice to the third + !------------------------------------------------ + ZADD=RLSTT*(RLSTT/(RV*ZTP1(JL,JK))-1.0_JPRB)/(2.4E-2_JPRB*ZTP1(JL,JK)) + ZBDD=RV*ZTP1(JL,JK)*PAP(JL,JK)/(2.21_JPRB*ZVPICE) + ZCVDS=7.8_JPRB*(ZICENUCLEI(JL)/ZRHO(JL))**0.666_JPRB*(ZVPLIQ-ZVPICE) / & + & (8.87_JPRB*(ZADD+ZBDD)*ZVPICE) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0=MAX(ZICECLD(JL), ZICENUCLEI(JL)*RICEINIT/ZRHO(JL)) + + !------------------ + ! new value of ice: + !------------------ + ZINEW=(0.666_JPRB*ZCVDS*PTSPHY+ZICE0**0.666_JPRB)**1.5_JPRB + + !--------------------------- + ! grid-mean deposition rate: + !--------------------------- + ZDEPOS=MAX(ZA(JL,JK)*(ZINEW-ZICE0),0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS=MIN(ZDEPOS,ZQXFG(JL,NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- +! ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI(JL)/15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB-ZINFACTOR)* & + & (RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH),1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(JL,NCLDQI,NCLDQL)=ZSOLQA(JL,NCLDQI,NCLDQL)+ZDEPOS + ZSOLQA(JL,NCLDQL,NCLDQI)=ZSOLQA(JL,NCLDQL,NCLDQI)-ZDEPOS + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZDEPOS + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)-ZDEPOS + + ENDIF + ENDDO + + !-------------------------------------------------------- + !- + !- Ice deposition assuming ice PSD + !- + !-------------------------------------------------------- + ELSEIF (IDEPICE == 2) THEN + + DO JL=KIDIA,KFDIA + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JL,JK-1) < RCLDTOPCF .AND. ZA(JL,JK) >= RCLDTOPCF) THEN + ZCLDTOPDIST(JL) = 0.0_JPRB + ELSE + ZCLDTOPDIST(JL) = ZCLDTOPDIST(JL) + ZDP(JL)/(ZRHO(JL)*RG) + ENDIF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JL,JK)RLMIN) THEN ! T<273K + + ZVPICE = FOEEICE(ZTP1(JL,JK))*RV/RD + ZVPLIQ = ZVPICE*ZFOKOOP(JL) + ZICENUCLEI(JL)=1000.0_JPRB*EXP(12.96_JPRB*(ZVPLIQ-ZVPICE)/ZVPLIQ-0.639_JPRB) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0=MAX(ZICECLD(JL), ZICENUCLEI(JL)*RICEINIT/ZRHO(JL)) + + ! Particle size distribution + ZTCG = 1.0_JPRB + ZFACX1I = 1.0_JPRB + + ZAPLUSB = RCL_APB1*ZVPICE-RCL_APB2*ZVPICE*ZTP1(JL,JK)+ & + & PAP(JL,JK)*RCL_APB3*ZTP1(JL,JK)**3._JPRB + ZCORRFAC = (1.0_JPRB/ZRHO(JL))**0.5_JPRB + ZCORRFAC2 = ((ZTP1(JL,JK)/273.0_JPRB)**1.5_JPRB) & + & *(393.0_JPRB/(ZTP1(JL,JK)+120.0_JPRB)) + + ZPR02 = ZRHO(JL)*ZICE0*RCL_CONST1I/(ZTCG*ZFACX1I) + + ZTERM1 = (ZVPLIQ-ZVPICE)*ZTP1(JL,JK)**2.0_JPRB*ZVPICE*ZCORRFAC2*ZTCG* & + & RCL_CONST2I*ZFACX1I/(ZRHO(JL)*ZAPLUSB*ZVPICE) + ZTERM2 = 0.65_JPRB*RCL_CONST6I*ZPR02**RCL_CONST4I+RCL_CONST3I & + & *ZCORRFAC**0.5_JPRB*ZRHO(JL)**0.5_JPRB & + & *ZPR02**RCL_CONST5I/ZCORRFAC2**0.5_JPRB + + ZDEPOS = MAX(ZA(JL,JK)*ZTERM1*ZTERM2*PTSPHY,0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS=MIN(ZDEPOS,ZQXFG(JL,NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI(JL)/15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB-ZINFACTOR)* & + & (RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH),1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(JL,NCLDQI,NCLDQL) = ZSOLQA(JL,NCLDQI,NCLDQL)+ZDEPOS + ZSOLQA(JL,NCLDQL,NCLDQI) = ZSOLQA(JL,NCLDQL,NCLDQI)-ZDEPOS + ZQXFG(JL,NCLDQI) = ZQXFG(JL,NCLDQI)+ZDEPOS + ZQXFG(JL,NCLDQL) = ZQXFG(JL,NCLDQL)-ZDEPOS + ENDIF + ENDDO + + ENDIF ! on IDEPICE + + !###################################################################### + ! 4 *** PRECIPITATION PROCESSES *** + !###################################################################### + + !---------------------------------- + ! revise in-cloud consensate amount + !---------------------------------- + DO JL=KIDIA,KFDIA + ZTMPA = 1.0_JPRB/MAX(ZA(JL,JK),ZEPSEC) + ZLIQCLD(JL) = ZQXFG(JL,NCLDQL)*ZTMPA + ZICECLD(JL) = ZQXFG(JL,NCLDQI)*ZTMPA + ZLICLD(JL) = ZLIQCLD(JL)+ZICECLD(JL) + ENDDO + + !---------------------------------------------------------------------- + ! 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + ! now that rain, snow, graupel species are prognostic + ! the precipitation flux can be defined directly level by level + ! There is no vertical memory required from the flux variable + !---------------------------------------------------------------------- + + DO JM = 1,NCLV + IF (LLFALL(JM) .OR. JM == NCLDQI) THEN + DO JL=KIDIA,KFDIA + !------------------------ + ! source from layer above + !------------------------ + IF (JK > NCLDTOP) THEN + ZFALLSRCE(JL,JM) = ZPFPLSX(JL,JK,JM)*ZDTGDP(JL) + ZSOLQA(JL,JM,JM) = ZSOLQA(JL,JM,JM)+ZFALLSRCE(JL,JM) + ZQXFG(JL,JM) = ZQXFG(JL,JM)+ZFALLSRCE(JL,JM) + ! use first guess precip----------V + ZQPRETOT(JL) = ZQPRETOT(JL)+ZQXFG(JL,JM) + ENDIF + !------------------------------------------------- + ! sink to next layer, constant fall speed + !------------------------------------------------- + ! if aerosol effect then override + ! note that for T>233K this is the same as above. + IF (LAERICESED .AND. JM == NCLDQI) THEN + ZRE_ICE=PRE_ICE(JL,JK) + ! The exponent value is from + ! Morrison et al. JAS 2005 Appendix + ZVQX(NCLDQI) = 0.002_JPRB*ZRE_ICE**1.0_JPRB + ENDIF + ZFALL=ZVQX(JM)*ZRHO(JL) + !------------------------------------------------- + ! modified by Heymsfield and Iaquinta JAS 2000 + !------------------------------------------------- + ! ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + ! &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + ZFALLSINK(JL,JM)=ZDTGDP(JL)*ZFALL + ! Cloud budget diagnostic stored at end as implicit + ENDDO ! jl + ENDIF ! LLFALL + ENDDO ! jm + + !--------------------------------------------------------------- + ! Precip cover overlap using MAX-RAN Overlap + ! Since precipitation is now prognostic we must + ! 1) apply an arbitrary minimum coverage (0.3) if precip>0 + ! 2) abandon the 2-flux clr/cld treatment + ! 3) Thus, since we have no memory of the clear sky precip + ! fraction, we mimic the previous method by reducing + ! ZCOVPTOT(JL), which has the memory, proportionally with + ! the precip evaporation rate, taking cloud fraction + ! into account + ! #3 above leads to much smoother vertical profiles of + ! precipitation fraction than the Klein-Jakob scheme which + ! monotonically increases precip fraction and then resets + ! it to zero in a step function once clear-sky precip reaches + ! zero. + !--------------------------------------------------------------- + DO JL=KIDIA,KFDIA + IF (ZQPRETOT(JL)>ZEPSEC) THEN + ZCOVPTOT(JL) = 1.0_JPRB - ((1.0_JPRB-ZCOVPTOT(JL))*& + & (1.0_JPRB - MAX(ZA(JL,JK),ZA(JL,JK-1)))/& + & (1.0_JPRB - MIN(ZA(JL,JK-1),1.0_JPRB-1.E-06_JPRB)) ) + ZCOVPTOT(JL) = MAX(ZCOVPTOT(JL),RCOVPMIN) + ZCOVPCLR(JL) = MAX(0.0_JPRB,ZCOVPTOT(JL)-ZA(JL,JK)) ! clear sky proportion + ZRAINCLD(JL) = ZQXFG(JL,NCLDQR)/ZCOVPTOT(JL) + ZSNOWCLD(JL) = ZQXFG(JL,NCLDQS)/ZCOVPTOT(JL) + ZCOVPMAX(JL) = MAX(ZCOVPTOT(JL),ZCOVPMAX(JL)) + ELSE + ZRAINCLD(JL) = 0.0_JPRB + ZSNOWCLD(JL) = 0.0_JPRB + ZCOVPTOT(JL) = 0.0_JPRB ! no flux - reset cover + ZCOVPCLR(JL) = 0.0_JPRB ! reset clear sky proportion + ZCOVPMAX(JL) = 0.0_JPRB ! reset max cover for ZZRH calc + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.3a AUTOCONVERSION TO SNOW + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + IF(ZTP1(JL,JK) <= RTT) THEN + !----------------------------------------------------- + ! Snow Autoconversion rate follow Lin et al. 1983 + !----------------------------------------------------- + IF (ZICECLD(JL)>ZEPSEC) THEN + + ZZCO=PTSPHY*RSNOWLIN1*EXP(RSNOWLIN2*(ZTP1(JL,JK)-RTT)) + + IF (LAERICEAUTO) THEN + ZLCRIT=PICRIT_AER(JL,JK) + ! 0.3 = N**0.333 with N=0.027 + ZZCO=ZZCO*(RNICE/PNICE(JL,JK))**0.333_JPRB + ELSE + ZLCRIT=RLCRITSNOW + ENDIF + + ZSNOWAUT(JL)=ZZCO*(1.0_JPRB-EXP(-(ZICECLD(JL)/ZLCRIT)**2)) + ZSOLQB(JL,NCLDQS,NCLDQI)=ZSOLQB(JL,NCLDQS,NCLDQI)+ZSNOWAUT(JL) + + ENDIF + ENDIF + + !---------------------------------------------------------------------- + ! 4.3b AUTOCONVERSION WARM CLOUDS + ! Collection and accretion will require separate treatment + ! but for now we keep this simple treatment + !---------------------------------------------------------------------- + + IF (ZLIQCLD(JL)>ZEPSEC) THEN + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Sundqvist (1989) + !- + !-------------------------------------------------------- + IF (IWARMRAIN == 1) THEN + + ZZCO=RKCONV*PTSPHY + + IF (LAERLIQAUTOLSP) THEN + ZLCRIT=PLCRIT_AER(JL,JK) + ! 0.3 = N**0.333 with N=125 cm-3 + ZZCO=ZZCO*(RCCN/PCCN(JL,JK))**0.333_JPRB + ELSE + ! Modify autoconversion threshold dependent on: + ! land (polluted, high CCN, smaller droplets, higher threshold) + ! sea (clean, low CCN, larger droplets, lower threshold) + IF (PLSM(JL) > 0.5_JPRB) THEN + ZLCRIT = RCLCRIT_LAND ! land + ELSE + ZLCRIT = RCLCRIT_SEA ! ocean + ENDIF + ENDIF + + !------------------------------------------------------------------ + ! Parameters for cloud collection by rain and snow. + ! Note that with new prognostic variable it is now possible + ! to REPLACE this with an explicit collection parametrization + !------------------------------------------------------------------ + ZPRECIP=(ZPFPLSX(JL,JK,NCLDQS)+ZPFPLSX(JL,JK,NCLDQR))/MAX(ZEPSEC,ZCOVPTOT(JL)) + ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB)) +! ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& +! &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + IF (LAERLIQCOLL) THEN + ! 5.0 = N**0.333 with N=125 cm-3 + ZCFPR=ZCFPR*(RCCN/PCCN(JL,JK))**0.333_JPRB + ENDIF + + ZZCO=ZZCO*ZCFPR + ZLCRIT=ZLCRIT/MAX(ZCFPR,ZEPSEC) + + IF(ZLIQCLD(JL)/ZLCRIT < 20.0_JPRB )THEN ! Security for exp for some compilers + ZRAINAUT(JL)=ZZCO*(1.0_JPRB-EXP(-(ZLIQCLD(JL)/ZLCRIT)**2)) + ELSE + ZRAINAUT(JL)=ZZCO + ENDIF + + ! rain freezes instantly + IF(ZTP1(JL,JK) <= RTT) THEN + ZSOLQB(JL,NCLDQS,NCLDQL)=ZSOLQB(JL,NCLDQS,NCLDQL)+ZRAINAUT(JL) + ELSE + ZSOLQB(JL,NCLDQR,NCLDQL)=ZSOLQB(JL,NCLDQR,NCLDQL)+ZRAINAUT(JL) + ENDIF + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Khairoutdinov and Kogan (2000) + !- + !-------------------------------------------------------- + ELSEIF (IWARMRAIN == 2) THEN + + IF (PLSM(JL) > 0.5_JPRB) THEN ! land + ZCONST = RCL_KK_CLOUD_NUM_LAND + ZLCRIT = RCLCRIT_LAND + ELSE ! ocean + ZCONST = RCL_KK_CLOUD_NUM_SEA + ZLCRIT = RCLCRIT_SEA + ENDIF + + IF (ZLIQCLD(JL) > ZLCRIT) THEN + + ZRAINAUT(JL) = 1.5_JPRB*ZA(JL,JK)*PTSPHY* & + & RCL_KKAau * ZLIQCLD(JL)**RCL_KKBauq * ZCONST**RCL_KKBaun + + ZRAINAUT(JL) = MIN(ZRAINAUT(JL),ZQXFG(JL,NCLDQL)) + IF (ZRAINAUT(JL) < ZEPSEC) ZRAINAUT(JL) = 0.0_JPRB + + ZRAINACC(JL) = 2.0_JPRB*ZA(JL,JK)*PTSPHY* & + & RCL_KKAac * (ZLIQCLD(JL)*ZRAINCLD(JL))**RCL_KKBac + + ZRAINACC(JL) = MIN(ZRAINACC(JL),ZQXFG(JL,NCLDQL)) + IF (ZRAINACC(JL) < ZEPSEC) ZRAINACC(JL) = 0.0_JPRB + + ELSE + ZRAINAUT(JL) = 0.0_JPRB + ZRAINACC(JL) = 0.0_JPRB + ENDIF + + ! If temperature < 0, then autoconversion produces snow rather than rain + ! Explicit + IF(ZTP1(JL,JK) <= RTT) THEN + ZSOLQA(JL,NCLDQS,NCLDQL)=ZSOLQA(JL,NCLDQS,NCLDQL)+ZRAINAUT(JL) + ZSOLQA(JL,NCLDQS,NCLDQL)=ZSOLQA(JL,NCLDQS,NCLDQL)+ZRAINACC(JL) + ZSOLQA(JL,NCLDQL,NCLDQS)=ZSOLQA(JL,NCLDQL,NCLDQS)-ZRAINAUT(JL) + ZSOLQA(JL,NCLDQL,NCLDQS)=ZSOLQA(JL,NCLDQL,NCLDQS)-ZRAINACC(JL) + ELSE + ZSOLQA(JL,NCLDQR,NCLDQL)=ZSOLQA(JL,NCLDQR,NCLDQL)+ZRAINAUT(JL) + ZSOLQA(JL,NCLDQR,NCLDQL)=ZSOLQA(JL,NCLDQR,NCLDQL)+ZRAINACC(JL) + ZSOLQA(JL,NCLDQL,NCLDQR)=ZSOLQA(JL,NCLDQL,NCLDQR)-ZRAINAUT(JL) + ZSOLQA(JL,NCLDQL,NCLDQR)=ZSOLQA(JL,NCLDQL,NCLDQR)-ZRAINACC(JL) + ENDIF + + ENDIF ! on IWARMRAIN + + ENDIF ! on ZLIQCLD > ZEPSEC + ENDDO + + + !---------------------------------------------------------------------- + ! RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + ! only active if T<0degC and supercooled liquid water is present + ! AND if not Sundquist autoconversion (as this includes riming) + !---------------------------------------------------------------------- + IF (IWARMRAIN > 1) THEN + + DO JL=KIDIA,KFDIA + IF(ZTP1(JL,JK) <= RTT .AND. ZLIQCLD(JL)>ZEPSEC) THEN + + ! Fallspeed air density correction + ZFALLCORR = (RDENSREF/ZRHO(JL))**0.4_JPRB + + !------------------------------------------------------------------ + ! Riming of snow by cloud water - implicit in lwc + !------------------------------------------------------------------ + IF (ZSNOWCLD(JL)>ZEPSEC .AND. ZCOVPTOT(JL)>0.01_JPRB) THEN + + ! Calculate riming term + ! Factor of liq water taken out because implicit + ZSNOWRIME(JL) = 0.3_JPRB*ZCOVPTOT(JL)*PTSPHY*RCL_CONST7S*ZFALLCORR & + & *(ZRHO(JL)*ZSNOWCLD(JL)*RCL_CONST1S)**RCL_CONST8S + + ! Limit snow riming term + ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + + ZSOLQB(JL,NCLDQS,NCLDQL) = ZSOLQB(JL,NCLDQS,NCLDQL) + ZSNOWRIME(JL) + + ENDIF + + !------------------------------------------------------------------ + ! Riming of ice by cloud water - implicit in lwc + ! NOT YET ACTIVE + !------------------------------------------------------------------ +! IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN +! +! ! Calculate riming term +! ! Factor of liq water taken out because implicit +! ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & +! & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S +! +! ! Limit ice riming term +! ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) +! +! ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) +! +! ENDIF + ENDIF + ENDDO + + ENDIF ! on IWARMRAIN > 1 + + + !---------------------------------------------------------------------- + ! 4.4a MELTING OF SNOW and ICE + ! with new implicit solver this also has to treat snow or ice + ! precipitating from the level above... i.e. local ice AND flux. + ! in situ ice and snow: could arise from LS advection or warming + ! falling ice and snow: arrives by precipitation process + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + ZICETOT(JL)=ZQXFG(JL,NCLDQI)+ZQXFG(JL,NCLDQS) + ZMELTMAX(JL) = 0.0_JPRB + + ! If there are frozen hydrometeors present and dry-bulb temperature > 0degC + IF(ZICETOT(JL) > ZEPSEC .AND. ZTP1(JL,JK) > RTT) THEN + + ! Calculate subsaturation + ZSUBSAT = MAX(ZQSICE(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB) + + ! Calculate difference between dry-bulb (ZTP1) and the temperature + ! at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + ! Melting only occurs if the wet-bulb temperature >0 + ! i.e. warming of ice particle due to melting > cooling + ! due to evaporation. + ZTDMTW0 = ZTP1(JL,JK)-RTT-ZSUBSAT* & + & (ZTW1+ZTW2*(PAP(JL,JK)-ZTW3)-ZTW4*(ZTP1(JL,JK)-ZTW5)) + ! Not implicit yet... + ! Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + ZCONS1 = ABS(PTSPHY*(1.0_JPRB+0.5_JPRB*ZTDMTW0)/RTAUMEL) + ZMELTMAX(JL) = MAX(ZTDMTW0*ZCONS1*ZRLDCP,0.0_JPRB) + ENDIF + ENDDO + + ! Loop over frozen hydrometeors (ice, snow) + DO JM=1,NCLV + IF (IPHASE(JM) == 2) THEN + JN = IMELT(JM) + DO JL=KIDIA,KFDIA + IF(ZMELTMAX(JL)>ZEPSEC .AND. ZICETOT(JL)>ZEPSEC) THEN + ! Apply melting in same proportion as frozen hydrometeor fractions + ZALFA = ZQXFG(JL,JM)/ZICETOT(JL) + ZMELT = MIN(ZQXFG(JL,JM),ZALFA*ZMELTMAX(JL)) + ! needed in first guess + ! This implies that zqpretot has to be recalculated below + ! since is not conserved here if ice falls and liquid doesn't + ZQXFG(JL,JM) = ZQXFG(JL,JM)-ZMELT + ZQXFG(JL,JN) = ZQXFG(JL,JN)+ZMELT + ZSOLQA(JL,JN,JM) = ZSOLQA(JL,JN,JM)+ZMELT + ZSOLQA(JL,JM,JN) = ZSOLQA(JL,JM,JN)-ZMELT + ENDIF + ENDDO + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.4b FREEZING of RAIN + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + ! If rain present + IF (ZQX(JL,JK,NCLDQR) > ZEPSEC) THEN + + IF (ZTP1(JL,JK) <= RTT .AND. ZTP1(JL,JK-1) > RTT) THEN + ! Base of melting layer/top of refreezing layer so + ! store rain/snow fraction for precip type diagnosis + ! If mostly rain, then supercooled rain slow to freeze + ! otherwise faster to freeze (snow or ice pellets) + ZQPRETOT(JL) = MAX(ZQX(JL,JK,NCLDQS)+ZQX(JL,JK,NCLDQR),ZEPSEC) + PRAINFRAC_TOPRFZ(JL) = ZQX(JL,JK,NCLDQR)/ZQPRETOT(JL) + IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN + LLRAINLIQ(JL) = .True. + ELSE + LLRAINLIQ(JL) = .False. + ENDIF + ENDIF + + ! If temperature less than zero + IF (ZTP1(JL,JK) < RTT) THEN + + IF (LLRAINLIQ(JL)) THEN + + ! Majority of raindrops completely melted + ! Refreezing is by slow heterogeneous freezing + + ! Slope of rain particle size distribution + ZLAMBDA = (RCL_FAC1/(ZRHO(JL)*ZQX(JL,JK,NCLDQR)))**RCL_FAC2 + + ! Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ZTEMP = RCL_FZRAB * (ZTP1(JL,JK)-RTT) + ZFRZ = PTSPHY * (RCL_CONST5R/ZRHO(JL)) * (EXP(ZTEMP)-1._JPRB) & + & * ZLAMBDA**RCL_CONST6R + ZFRZMAX(JL) = MAX(ZFRZ,0.0_JPRB) + + ELSE + + ! Majority of raindrops only partially melted + ! Refreeze with a shorter timescale (reverse of melting...for now) + + ZCONS1 = ABS(PTSPHY*(1.0_JPRB+0.5_JPRB*(RTT-ZTP1(JL,JK)))/RTAUMEL) + ZFRZMAX(JL) = MAX((RTT-ZTP1(JL,JK))*ZCONS1*ZRLDCP,0.0_JPRB) + + ENDIF + + IF(ZFRZMAX(JL)>ZEPSEC) THEN + ZFRZ = MIN(ZQX(JL,JK,NCLDQR),ZFRZMAX(JL)) + ZSOLQA(JL,NCLDQS,NCLDQR) = ZSOLQA(JL,NCLDQS,NCLDQR)+ZFRZ + ZSOLQA(JL,NCLDQR,NCLDQS) = ZSOLQA(JL,NCLDQR,NCLDQS)-ZFRZ + ENDIF + ENDIF + + ENDIF + + ENDDO + + !---------------------------------------------------------------------- + ! 4.4c FREEZING of LIQUID + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + ! not implicit yet... + ZFRZMAX(JL)=MAX((RTHOMO-ZTP1(JL,JK))*ZRLDCP,0.0_JPRB) + ENDDO + + JM = NCLDQL + JN = IMELT(JM) + DO JL=KIDIA,KFDIA + IF(ZFRZMAX(JL)>ZEPSEC .AND. ZQXFG(JL,JM)>ZEPSEC) THEN + ZFRZ = MIN(ZQXFG(JL,JM),ZFRZMAX(JL)) + ZSOLQA(JL,JN,JM) = ZSOLQA(JL,JN,JM)+ZFRZ + ZSOLQA(JL,JM,JN) = ZSOLQA(JL,JM,JN)-ZFRZ + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.5 EVAPORATION OF RAIN/SNOW + !---------------------------------------------------------------------- + + !---------------------------------------- + ! Rain evaporation scheme from Sundquist + !---------------------------------------- + IF (IEVAPRAIN == 1) THEN + + ! Rain + + DO JL=KIDIA,KFDIA + + ZZRH=RPRECRHMAX+(1.0_JPRB-RPRECRHMAX)*ZCOVPMAX(JL)/MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZZRH=MIN(MAX(ZZRH,RPRECRHMAX),1.0_JPRB) + + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSLIQ(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + !--------------------------------------------- + ! humidity in moistest ZCOVPCLR part of domain + !--------------------------------------------- + ZQE=MAX(0.0_JPRB,MIN(ZQE,ZQSLIQ(JL,JK))) + LLO1=ZCOVPCLR(JL)>ZEPSEC .AND. & + & ZQXFG(JL,NCLDQR)>ZEPSEC .AND. & + & ZQE0.8) + !IF(ZSIGK > 0.8_JPRB) THEN + ! ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + !ENDIF + !ZZRH = MIN(ZRHC,ZZRH) + + ! Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + ZZRH = MIN(0.8_JPRB,ZZRH) + + ZQE=MAX(0.0_JPRB,MIN(ZQX(JL,JK,NCLDQV),ZQSLIQ(JL,JK))) + + LLO1=ZCOVPCLR(JL)>ZEPSEC .AND. & + & ZQXFG(JL,NCLDQR)>ZEPSEC .AND. & + & ZQEZEPSEC .AND. & + & ZQXFG(JL,NCLDQS)>ZEPSEC .AND. & + & ZQEZEPSEC .AND. & + & ZQX(JL,JK,NCLDQS)>ZEPSEC .AND. & + & ZQE= 1) then + CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NUMOMP +end if + +! Initialize MPI environment +CALL CLOUDSC_MPI_INIT(NUMOMP) + +! Get total number of grid points (NGPTOT) with which to run the benchmark +IF (IARGS >= 2) THEN + CALL GET_COMMAND_ARGUMENT(2, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NGPTOTG +END IF + +! Determine local number of grid points +NGPTOT = (NGPTOTG - 1) / NUMPROC + 1 +if (IRANK == NUMPROC - 1) then + NGPTOT = NGPTOTG - (NUMPROC - 1) * NGPTOT +end if + +! Get the block size (NPROMA) for which to run the benchmark +IF (IARGS >= 3) THEN + CALL GET_COMMAND_ARGUMENT(3, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NPROMA +ENDIF + +! TODO: Create a global global memory state from serialized input data +CALL GLOBAL_STATE%LOAD(NPROMA, NGPTOT, NGPTOTG) +IF(.NOT.ALLOCATED(YRCST)) STOP 'YRCST not allocated' +IF(.NOT.ALLOCATED(YRTHF)) STOP 'YRTHF not allocated' +IF(.NOT.ALLOCATED(YRECLDP)) STOP 'YRECLDP not allocated' +! Call the driver to perform the parallel loop over our kernel +CALL CLOUDSC_DRIVER( NUMOMP, NPROMA, GLOBAL_STATE%KLEV, NGPTOT, NGPTOTG, & + & GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, & + & GLOBAL_STATE%PT, GLOBAL_STATE%PQ, & +! & GLOBAL_STATE%TENDENCY_CML, GLOBAL_STATE%TENDENCY_TMP, GLOBAL_STATE%TENDENCY_LOC, & + & GLOBAL_STATE%B_TMP, GLOBAL_STATE%B_LOC, & + & GLOBAL_STATE%PVFA, GLOBAL_STATE%PVFL, GLOBAL_STATE%PVFI, & + & GLOBAL_STATE%PDYNA, GLOBAL_STATE%PDYNL, GLOBAL_STATE%PDYNI, & + & GLOBAL_STATE%PHRSW, GLOBAL_STATE%PHRLW, & + & GLOBAL_STATE%PVERVEL, GLOBAL_STATE%PAP, GLOBAL_STATE%PAPH, & + & GLOBAL_STATE%PLSM, GLOBAL_STATE%LDCUM, GLOBAL_STATE%KTYPE, & + & GLOBAL_STATE%PLU, GLOBAL_STATE%PLUDE, GLOBAL_STATE%PSNDE, & + & GLOBAL_STATE%PMFU, GLOBAL_STATE%PMFD, & + & GLOBAL_STATE%PA, GLOBAL_STATE%PCLV, GLOBAL_STATE%PSUPSAT,& + & GLOBAL_STATE%PLCRIT_AER, GLOBAL_STATE%PICRIT_AER, GLOBAL_STATE%PRE_ICE, & + & GLOBAL_STATE%PCCN, GLOBAL_STATE%PNICE,& + & GLOBAL_STATE%PCOVPTOT, GLOBAL_STATE%PRAINFRAC_TOPRFZ, & + & GLOBAL_STATE%PFSQLF, GLOBAL_STATE%PFSQIF , GLOBAL_STATE%PFCQNNG, GLOBAL_STATE%PFCQLNG, & + & GLOBAL_STATE%PFSQRF, GLOBAL_STATE%PFSQSF , GLOBAL_STATE%PFCQRNG, GLOBAL_STATE%PFCQSNG, & + & GLOBAL_STATE%PFSQLTUR, GLOBAL_STATE%PFSQITUR, & + & GLOBAL_STATE%PFPLSL, GLOBAL_STATE%PFPLSN, GLOBAL_STATE%PFHPSL, GLOBAL_STATE%PFHPSN, & + & YRCST, YRTHF, YRECLDP) + +! Validate the output against serialized reference data +CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG) + +! Tear down MPI environment +CALL CLOUDSC_MPI_END() + +END PROGRAM DWARF_CLOUDSC diff --git a/src/cloudsc_pyiface/kind_map b/src/cloudsc_pyiface/kind_map new file mode 100755 index 00000000..6325f33f --- /dev/null +++ b/src/cloudsc_pyiface/kind_map @@ -0,0 +1,18 @@ +{ + 'real': { '' : 'float', + '4' : 'float', + 'isp' : 'float', + '8' : 'double', + 'dp' : 'double', + 'jprb' : 'double'}, + 'complex' : { '' : 'complex_float', + '8' : 'complex_double', + '16' : 'complex_long_double', + 'dp' : 'complex_double'}, + 'integer' : { '4' : 'int', + '8' : 'long_long', + 'jpim' : 'int', + 'dp' : 'long_long' }, + 'character' : {'' : 'char', + '1' : 'char' } +} diff --git a/src/cloudsc_pyiface/pyproject.toml b/src/cloudsc_pyiface/pyproject.toml new file mode 100644 index 00000000..eb8a9e95 --- /dev/null +++ b/src/cloudsc_pyiface/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["setuptools >= 64"] +build-backend = "setuptools.build_meta" + +[project] +name = "pyiface" +version = "0.1.0" +authors = [ + {name = "Zbigniew Piotrowski", email = "zbigniew.piotrowski@ecmwf.int"}, + {name = "Michael Lange", email = "michael.lange@ecmwf.int"} +] +description = "Python driver that enables execution of the Fortran CLOUDSC dwarf from Python" +readme = "README.md" +requires-python = ">=3.8" +license = {file = "LICENSE"} +classifiers = [ + " Development Status :: 3 - Alpha ", + " Intended Audience:: Science / Research ", + " License :: OSI Approved:: Apache License, Version 2.0 ", + " Natural Language :: English ", + " Operating System :: POSIX ", + " Programming Language :: Python :: 3.8 ", + " Programming Language :: Python :: 3.9 ", + " Programming Language :: Python :: 3.10 ", + " Programming Language :: Python :: 3.11 ", + " Topic :: Scientific/Engineering :: Atmospheric Science " +] +dependencies = [ + "numpy", + "f90wrap", + "click", + "h5py", +] + +[project.scripts] +"cloudsc_pyiface.py" = "drivers.cloudsc_pyiface:main" + +[project.urls] +repository = "https://github.com/ecmwf-ifs/dwarf-p-cloudsc" + +[tool.setuptools.packages.find] +where = ["src", "."] +include = ["pyiface*", "drivers*"] diff --git a/src/cloudsc_pyiface/src/pyiface/__init__.py b/src/cloudsc_pyiface/src/pyiface/__init__.py new file mode 100644 index 00000000..119361b2 --- /dev/null +++ b/src/cloudsc_pyiface/src/pyiface/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- + +# (C) Copyright 2018- ECMWF. + +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +from pyiface.cloudsc_data import * # noqa +from pyiface.dynload import * # noqa diff --git a/src/cloudsc_pyiface/src/pyiface/cloudsc_data.py b/src/cloudsc_pyiface/src/pyiface/cloudsc_data.py new file mode 100644 index 00000000..df8dce40 --- /dev/null +++ b/src/cloudsc_pyiface/src/pyiface/cloudsc_data.py @@ -0,0 +1,442 @@ +""" +cloudsc_data module consist of utilities that: +- load variables serving as an input to a Fortran computational kernel; +- load physical parameters needed by a Fortran kernel; +- load reference results that will be compared with an output of Fortran computation; +- validates reference vs. computed fields; +- other, purely technical utilities. +""" +from collections import OrderedDict +import h5py +import numpy as np + +NCLV = 5 # number of microphysics variables + + +def define_fortran_fields(nproma, nlev, nblocks, clsc): + """ + define_fortran_fields returns: + - zero NumPy arrays that will further be used as an output of Fortran kernel computation. + - empty Fortran paramter datatypes that are created used constructors supplied by f90wrap. + """ + + fields = OrderedDict() + + argnames_nlev = [ + 'pcovptot' + ] + + argnames_nlevp = [ + 'pfplsl', 'pfplsn', 'pfhpsl', 'pfhpsn', + 'pfsqlf', 'pfsqif' , 'pfcqnng', 'pfcqlng', + 'pfsqrf', 'pfsqsf' , 'pfcqrng', 'pfcqsng', + 'pfsqltur', 'pfsqitur' + ] + + argnames_buffer = [ + 'buffer_loc','buffer_tmp' + ] + + argnames_tend = [ + 'tendency_loc_a','tendency_loc_t','tendency_loc_q', + ] + + argnames_tend_cld = [ + 'tendency_loc_cld' + ] + + argnames_nproma = [ + 'prainfrac_toprfz' + ] + + for argname in argnames_nlev: + fields[argname] = np.zeros(shape=(nproma,nlev ,nblocks), order='F') + + for argname in argnames_nlevp: + fields[argname] = np.zeros(shape=(nproma,nlev+1,nblocks), order='F') + + for argname in argnames_buffer: + fields[argname] = np.zeros(shape=(nproma,nlev,3+NCLV,nblocks), order='F') + + for argname in argnames_tend: + fields[argname] = np.zeros(shape=(nproma,nlev,nblocks), order='F') + + for argname in argnames_tend_cld: + fields[argname] = np.zeros(shape=(nproma,nlev,NCLV,nblocks), order='F') + + + for argname in argnames_nproma: + fields[argname] = np.zeros(shape=(nproma,nblocks), order='F') + + fields['ydomcst']=clsc.yomcst.TOMCST() + fields['ydoethf']=clsc.yoethf.TOETHF() + fields['ydecldp']=clsc.yoecldp.TECLDP() + fields['ydephli']=clsc.yoephli.TEPHLI() + + return fields + + +def field_c_to_fortran(dims, cfield, clsc=None, **kwargs): + """ + field_c_to_fortran: + 1) transposes C array input to Fortran array + 2) rewrites Fortran linear array into block structure + """ + + # Transpose the C array (row-major) into Fortran (column-major) data layout + ffieldtmp = np.asfortranarray(np.transpose(np.ascontiguousarray(cfield))) + + return field_linear_to_block(dims, ffieldtmp, clsc=clsc, **kwargs) + + +def field_linear_to_block(dims, lfield, clsc=None, **kwargs): + """ + Rewrites Fortran linear array into block structure + """ + + if not clsc: + raise RuntimeError('[PyIface] Cannot expand field without CLOUDSC Fortran backend') + + # Pick array dimension arguments from keyword args + nproma = kwargs.get('nproma', 32) + nblocks = kwargs.get('nblocks', 1) + ngptot = kwargs.get('ngptot', 100) + nlon = kwargs.get('nlon', 100) + ndim = kwargs.get('ndim', 1) + nlev = dims[-2] #nparms['NLEV'] + ldims = len(dims) + + if lfield.dtype == "float64": + if ldims == 2: + b2field=np.asfortranarray(np.transpose( + np.zeros(shape=dims, dtype="float64"))) + clsc.expand_mod.expand_r1(lfield, b2field, nlon, nproma, ngptot, nblocks) + bfield=b2field + elif ldims == 3: + b3field=np.asfortranarray(np.transpose(np.zeros(shape=dims, dtype="float64"))) + clsc.expand_mod.expand_r2(lfield, b3field, nlon, nproma, nlev, ngptot, nblocks) + bfield=b3field + elif ldims == 4: + b4field=np.asfortranarray(np.transpose(np.zeros(shape=dims, dtype="float64"))) + clsc.expand_mod.expand_r3(lfield, b4field, nlon=nlon, nproma=nproma, nlev=nlev, + ndim=ndim, ngptot=ngptot, nblocks=nblocks) + bfield=b4field + else: + print ("Wrong float ldim") + elif lfield.dtype == "bool": + # Workaround - using type int32, otherwise complains about type disagreement at runtime + bfield=np.asfortranarray(np.transpose(np.zeros(shape=dims, dtype='int32'))) + if ldims == 2: + tlfield=lfield.astype('int32') + clsc.expand_mod.expand_l1(tlfield, bfield, nlon, nproma, ngptot, nblocks) + else: + print ("Wrong bool ldim") + elif lfield.dtype == "int32": + bfield=np.asfortranarray(np.transpose(np.zeros(shape=dims, dtype='int32'))) + if ldims == 2: + clsc.expand_mod.expand_i1(lfield, bfield, nlon, nproma, ngptot, nblocks) + else: + print ("Wrong int ldim") + else: + print ("Wrong dtype") + return bfield + +def load_input_fortran_fields(path, fields, clsc=None, **kwargs): + """ + load_input_fortran_fields returns: + - set of variables needed to initiate computation of the Fortran kernel. + """ + + if not clsc: + raise RuntimeError('[PyIface] Cannot load input fields without CLOUDSC Fortran backend') + + nproma = kwargs['nproma'] + nlev = kwargs['nlev'] + nblocks = kwargs['nblocks'] + argnames_nlev = [ + 'pt', 'pq', + 'pvfa', 'pvfl', 'pvfi', 'pdyna', 'pdynl', 'pdyni', + 'phrsw', 'phrlw','pvervel','pap','plu','plude', + 'psnde', 'pmfu', 'pmfd', + 'pa', 'psupsat', + 'plcrit_aer','picrit_aer','pre_ice', + 'pccn', 'pnice' + ] + argnames_nlevp = [ + 'paph' + ] + + argnames_withnclv= [ + 'pclv','tendency_tmp_cld' + ] + + argnames_tend = [ + 'tendency_tmp_t','tendency_tmp_q','tendency_tmp_a' + ] + + argnames_scalar = [ + 'kfldx' + ] + + argnames_nproma = [ + 'plsm', 'ldcum', 'ktype' + ] + + + with h5py.File(path, 'r') as f: + fields['KLON'] = f['KLON'][0] + fields['KLEV'] = f['KLEV'][0] + fields['PTSPHY'] = f['PTSPHY'][0] + kwargs['nlon'] = fields['KLON'] + + for argname in argnames_nlev: + fields[argname] = field_c_to_fortran((nblocks,nlev,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_nlevp: + fields[argname] = field_c_to_fortran((nblocks,nlev+1,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_withnclv: + fields[argname] = field_c_to_fortran((nblocks,NCLV,nlev,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_tend: + fields[argname] = field_c_to_fortran((nblocks,nlev,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_nproma: + fields[argname] = field_c_to_fortran((nblocks,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_scalar: + fields[argname] = f[argname.upper()][0] + + pack_buffer_using_tendencies(fields['buffer_tmp' ], + fields['tendency_tmp_a' ], + fields['tendency_tmp_t' ], + fields['tendency_tmp_q' ], + fields['tendency_tmp_cld']) + return fields + +def pack_buffer_using_tendencies(buffervar,tendency_a,tendency_t,tendency_q,tendency_cld): + """ + pack_buffer_using_tendencies serves as a packager of a single-variable + (that may consist of multiple fields, e.g. moist species) + tendencies into a continous buffer + """ + buffervar[:,:,0 ,:]=tendency_t [:,:,:] + buffervar[:,:,1 ,:]=tendency_a [:,:,:] + buffervar[:,:,2 ,:]=tendency_q [:,:,:] + buffervar[:,:,3:3+NCLV-1,:]=tendency_cld[:,:,0:NCLV-1,:] + +def unpack_buffer_to_tendencies(buffervar,tendency_a,tendency_t,tendency_q,tendency_cld): + """ + unpack_buffer_to_tendencies continuous unpacks buffer into a set of a single-variable + (that may consist of multiple fields, e.g. moist species) tendencies. + """ + tendency_t [:,:,:]=buffervar[:,:,0 ,:] + tendency_a [:,:,:]=buffervar[:,:,1 ,:] + tendency_q [:,:,:]=buffervar[:,:,2 ,:] + tendency_cld[:,:,0:NCLV-1,:]=buffervar[:,:,3:3+NCLV-1,:] + +def load_input_parameters(path,yrecldp,yrephli,yrmcst,yrethf): + """ + load_input_parameters returns: + - four parameter datatypes that are filled using names read from the reference .h5 file + """ + with h5py.File(path, 'r') as f: + tecldp_keys = [k for k in f.keys() if 'YRECLDP' in k] + for k in tecldp_keys: + attrkey = k.replace('YRECLDP_', '').lower() + setattr(yrecldp, attrkey, f[k][0]) + yrecldp.ncldql = 1 + yrecldp.ncldqi = 2 + yrecldp.ncldqr = 3 + yrecldp.ncldqs = 4 + yrecldp.ncldqv = 5 + + tephli_keys = [k for k in f.keys() if 'YREPHLI' in k] + for k in tephli_keys: + attrkey = k.replace('YREPHLI_', '').lower() + setattr(yrephli, attrkey, f[k][0]) + + tomcst_keys = ['RG', 'RD', 'RCPD', 'RETV', 'RLVTT', 'RLSTT', 'RLMLT', 'RTT', 'RV' ] + for k in tomcst_keys: + attrkey = k.lower() + setattr(yrmcst, attrkey, f[k][0]) + + toethf_keys = ['R2ES', 'R3LES', 'R3IES', 'R4LES', 'R4IES', 'R5LES', 'R5IES', + 'R5ALVCP', 'R5ALSCP', 'RALVDCP', 'RALSDCP', 'RALFDCP', + 'RTWAT', 'RTICE', 'RTICECU', 'RTWAT_RTICE_R', 'RTWAT_RTICECU_R', + 'RKOOP1', 'RKOOP2' ] + + for k in toethf_keys: + attrkey = k.lower() + setattr(yrethf, attrkey, f[k][0]) + +def convert_fortran_output_to_python (input_fields, **kwargs): + """ + convert_fortran_output_to_python converts Fortran-format fields that are to be compared to + reference results into a Python format. + """ + nproma = kwargs['nproma'] + nlev = kwargs['nlev'] + nblocks = kwargs['nblocks'] + + fields = OrderedDict() + argnames_nlev = [ + 'plude', 'pcovptot' + ] + + argnames_nlevp = [ + 'pfplsl', 'pfplsn', 'pfhpsl', 'pfhpsn', + 'pfsqlf', 'pfsqif' , 'pfcqnng', 'pfcqlng', + 'pfsqrf', 'pfsqsf' , 'pfcqrng', 'pfcqsng', + 'pfsqltur', 'pfsqitur' + ] + + argnames_nproma = [ + 'prainfrac_toprfz' + ] + + argnames_tend = [ + 'tendency_loc_a','tendency_loc_t','tendency_loc_q' + ] + + argnames_tend_cld = [ + 'tendency_loc_cld' + ] + + for argname in argnames_nlev: + fields[argname] = input_fields[argname] + + for argname in argnames_nlevp: + fields[argname] = input_fields[argname] + + for argname in argnames_nproma: + fields[argname] = input_fields[argname] + + for argname in argnames_tend: + fields[argname] = np.zeros(shape=(nproma,nlev,nblocks), order='F') + + for argname in argnames_tend_cld: + fields[argname] = np.zeros(shape=(nproma,nlev,NCLV,nblocks), order='F') + + + unpack_buffer_to_tendencies(input_fields ['buffer_loc'], + fields ['tendency_loc_a'], + fields ['tendency_loc_t'], + fields ['tendency_loc_q'], + fields ['tendency_loc_cld']) + + return fields + +def load_reference_fields (path, clsc=None, **kwargs): + """ + load_reference_fields loads reference results of Fortran computation from the .h5 file + """ + + if not clsc: + raise RuntimeError('[PyIface] Cannot load reference fields without CLOUDSC Fortran backend') + + nproma = kwargs['nproma'] + nlev = kwargs['nlev'] + nblocks = kwargs['nblocks'] + fields = OrderedDict() + + argnames_nlev = [ + 'plude', 'pcovptot' + ] + + argnames_nproma = [ + 'prainfrac_toprfz' + ] + + argnames_nlevp = [ + 'pfsqlf', 'pfsqif' , 'pfcqnng', 'pfcqlng', + 'pfsqrf', 'pfsqsf' , 'pfcqrng', 'pfcqsng', + 'pfsqltur', 'pfsqitur' , + 'pfplsl', 'pfplsn', 'pfhpsl', 'pfhpsn' + ] + + argnames_tend = [ + 'tendency_loc_a','tendency_loc_t','tendency_loc_q', + ] + + argnames_tend_cld = [ + 'tendency_loc_cld' + ] + + with h5py.File(path, 'r') as f: + fields['KLON'] = f['KLON'][0] + fields['KLEV'] = f['KLEV'][0] + kwargs['nlon'] = fields['KLON'] + + for argname in argnames_nlev: + fields[argname] = field_c_to_fortran((nblocks,nlev,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_nlevp: + fields[argname] = field_c_to_fortran((nblocks,nlev+1,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_nproma: + fields[argname] = field_c_to_fortran((nblocks,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_tend: + fields[argname] = field_c_to_fortran((nblocks,nlev,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + for argname in argnames_tend_cld: + fields[argname] = field_c_to_fortran((nblocks,NCLV,nlev,nproma), + f[argname.upper()], clsc=clsc, **kwargs) + + return fields + +def cloudsc_validate(fields, ref_fields): + """ + cloudsc_validate compares computed output of a Fortran kernel with reference results + previously read from the .h5 file. + """ + # List of refencece fields names in order + _field_names = [ + 'plude', 'pcovptot','prainfrac_toprfz', + 'pfsqlf', 'pfsqif' , 'pfcqnng', 'pfcqlng', + 'pfsqrf', 'pfsqsf' , 'pfcqrng', 'pfcqsng', + 'pfsqltur', 'pfsqitur' , + 'pfplsl', 'pfplsn', 'pfhpsl', 'pfhpsn', + 'tendency_loc_a', 'tendency_loc_q', 'tendency_loc_t', 'tendency_loc_cld' + ] + kidia = 1 + kfdia = 100 + ngptot = kfdia - kidia + 1 + + print(" Variable Dim MinValue MaxValue\ + AbsMaxErr AvgAbsErr/GP MaxRelErr-%") + for name in _field_names: + if len(fields[name].shape) == 1: + f = fields[name][kidia-1:kfdia] + ref = ref_fields[name][kidia-1:kfdia] + elif len(fields[name].shape) == 2: + f = fields[name][:,kidia-1:kfdia] + ref = ref_fields[name][:,kidia-1:kfdia] + elif len(fields[name].shape) == 3: + f = fields[name][:,:,kidia-1:kfdia] + ref = ref_fields[name][:,:,kidia-1:kfdia] + else: + f = fields[name] + ref = ref_fields[name] + zsum = np.sum(np.absolute(ref)) + zerrsum = np.sum(np.absolute(f - ref)) + zeps = np.finfo(np.float64).eps + print(' {fname:>20} {fmin:20.13e} {fmax:20.13e} {absmax:20.13e} '\ + ' {absavg:20.13e} {maxrel:20.13e}'.format( + fname=name.upper(), fmin=f.min(), fmax=f.max(), + absmax=np.absolute(f - ref).max(), + absavg=np.sum(np.absolute(f - ref)) / ngptot, + maxrel=0.0 if zerrsum < zeps else (zerrsum/(1.0+zsum) + if zsum < zeps else zerrsum/zsum) + ) + ) diff --git a/src/cloudsc_pyiface/src/pyiface/dynload.py b/src/cloudsc_pyiface/src/pyiface/dynload.py new file mode 100644 index 00000000..68cc6eed --- /dev/null +++ b/src/cloudsc_pyiface/src/pyiface/dynload.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +# (C) Copyright 2018- ECMWF. + +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +""" +Utility routines to dynamically load generated Python modules +""" + +import sys +from pathlib import Path +from importlib import import_module, invalidate_caches, reload + + +__all__ = ['load_module'] + + +def load_module(module, modpath=None): + """ + Utility routine to dynamically load the requested Python module. + """ + + modpath = Path.cwd() if modpath is None else modpath + modpath = str(Path(modpath).absolute()) + if modpath not in sys.path: + sys.path.insert(0, modpath) + if module in sys.modules: + reload(sys.modules[module]) + return sys.modules[module] + + # Trigger the actual module import + try: + return import_module(module) + except ModuleNotFoundError: + # If module caching interferes, try again with clean caches + invalidate_caches() + return import_module(module) diff --git a/src/common/module/expand_mod.F90 b/src/common/module/expand_mod.F90 index 109dd169..4b8b86f9 100644 --- a/src/common/module/expand_mod.F90 +++ b/src/common/module/expand_mod.F90 @@ -306,7 +306,6 @@ subroutine expand_r3(buffer, field, nlon, nproma, nlev, ndim, ngptot, nblocks) real(kind=jprb), intent(inout) :: field(nproma, nlev, ndim, nblocks) integer(kind=jpim), intent(in) :: nlon, nlev, ndim, nproma, ngptot, nblocks integer :: b, gidx, bsize, fidx, fend, bidx, bend - !$omp parallel do default(shared) private(b, gidx, bsize, fidx, fend, bidx, bend) schedule(runtime) do b=1, nblocks gidx = (b-1)*nproma + 1 ! Global starting index of the block in the general domain @@ -333,5 +332,4 @@ subroutine expand_r3(buffer, field, nlon, nproma, nlev, ndim, ngptot, nblocks) end do !$omp end parallel do end subroutine expand_r3 - end module expand_mod From 2a050c65ad31dedf6213eebd100bc9b3f74db832 Mon Sep 17 00:00:00 2001 From: Stefano Ubbiali Date: Mon, 24 Apr 2023 11:47:09 +0200 Subject: [PATCH 018/174] Adapt to GT4Py Cartesian. --- src/cloudsc_python/bootstrap_venv.sh | 3 --- src/cloudsc_python/requirements.txt | 2 +- src/cloudsc_python/setup.cfg | 2 +- src/cloudsc_python/src/cloudsc4py/__init__.py | 2 +- src/cloudsc_python/src/cloudsc4py/framework/components.py | 4 ++-- src/cloudsc_python/src/cloudsc4py/framework/stencil.py | 4 ++-- .../src/cloudsc4py/physics/_stencils/cloudsc.py | 2 +- .../src/cloudsc4py/physics/_stencils/cloudsc_split.py | 2 +- .../src/cloudsc4py/physics/_stencils/cuadjtq.py | 2 +- src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py | 2 +- src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py | 2 +- .../src/cloudsc4py/physics/_stencils/helpers.py | 2 +- 12 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/cloudsc_python/bootstrap_venv.sh b/src/cloudsc_python/bootstrap_venv.sh index 3cc211f2..b93e8c2c 100755 --- a/src/cloudsc_python/bootstrap_venv.sh +++ b/src/cloudsc_python/bootstrap_venv.sh @@ -31,9 +31,6 @@ function install() # install cloudsc4py pip install -e . - # install gt sources - python -m gt4py.gt_src_manager install - # setup gt4py cache mkdir -p gt_cache echo -e "\nexport GT_CACHE_ROOT=$PWD/gt_cache" >> "$VENV"/bin/activate diff --git a/src/cloudsc_python/requirements.txt b/src/cloudsc_python/requirements.txt index 0e2878cc..c2a4a4b8 100644 --- a/src/cloudsc_python/requirements.txt +++ b/src/cloudsc_python/requirements.txt @@ -1,5 +1,5 @@ click -gt4py[dace]@git+https://github.com/gridtools/gt4py.git#egg=gt4py +gt4py[dace]>=1.0.1 h5py numpy pandas diff --git a/src/cloudsc_python/setup.cfg b/src/cloudsc_python/setup.cfg index cdaac6f9..daaf1e95 100644 --- a/src/cloudsc_python/setup.cfg +++ b/src/cloudsc_python/setup.cfg @@ -30,7 +30,7 @@ package_dir = =src install_requires = click - gt4py[dace] @ git+https://github.com/GridTools/gt4py.git@master#egg=gt4py + gt4py[dace] >= 1.0.1 h5py numpy pandas diff --git a/src/cloudsc_python/src/cloudsc4py/__init__.py b/src/cloudsc_python/src/cloudsc4py/__init__.py index 0f97d401..fe3b92a3 100644 --- a/src/cloudsc_python/src/cloudsc4py/__init__.py +++ b/src/cloudsc_python/src/cloudsc4py/__init__.py @@ -11,7 +11,7 @@ import os -import gt4py.config as gt_config +import gt4py.cartesian.config as gt_config import cloudsc4py.physics diff --git a/src/cloudsc_python/src/cloudsc4py/framework/components.py b/src/cloudsc_python/src/cloudsc4py/framework/components.py index c3db2b57..a1e9c1bd 100644 --- a/src/cloudsc_python/src/cloudsc4py/framework/components.py +++ b/src/cloudsc_python/src/cloudsc4py/framework/components.py @@ -26,11 +26,11 @@ if TYPE_CHECKING: from typing import Any, Dict - from gt4py import StencilObject - from gt4py.storage import Storage + from gt4py.cartesian import StencilObject from sympl._core.typingx import PropertyDict from cloudsc4py.framework.grid import ComputationalGrid + from cloudsc4py.utils.typingx import Storage class ComputationalGridComponent: diff --git a/src/cloudsc_python/src/cloudsc4py/framework/stencil.py b/src/cloudsc_python/src/cloudsc4py/framework/stencil.py index dc06f146..d4847281 100644 --- a/src/cloudsc_python/src/cloudsc4py/framework/stencil.py +++ b/src/cloudsc_python/src/cloudsc4py/framework/stencil.py @@ -12,12 +12,12 @@ from __future__ import annotations from typing import TYPE_CHECKING -from gt4py import gtscript +from gt4py.cartesian import gtscript if TYPE_CHECKING: from typing import Any, Dict - from gt4py import StencilObject + from gt4py.cartesian import StencilObject from cloudsc4py.framework.config import GT4PyConfig diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py index ecb4f48d..23f2c9c8 100644 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py +++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py @@ -11,7 +11,7 @@ from __future__ import annotations -from gt4py.gtscript import Field, IJ, K +from gt4py.cartesian.gtscript import Field, IJ, K from cloudsc4py.framework.stencil import stencil_collection from cloudsc4py.physics._stencils.cuadjtq import f_cuadjtq diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py index 0a7d27c0..f081fa2a 100644 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py +++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py @@ -11,7 +11,7 @@ from __future__ import annotations -from gt4py.gtscript import Field, IJ, K +from gt4py.cartesian.gtscript import Field, IJ, K from cloudsc4py.framework.stencil import stencil_collection from cloudsc4py.physics._stencils.cuadjtq import f_cuadjtq diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py index 5b358fe1..27d11ca1 100644 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py +++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py @@ -9,7 +9,7 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -from gt4py import gtscript +from gt4py.cartesian import gtscript from cloudsc4py.framework.stencil import function_collection from cloudsc4py.physics._stencils.fcttre import f_foedem, f_foeewm, f_foeldcpm diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py index 9be00540..faeaa00a 100644 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py +++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py @@ -9,7 +9,7 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -from gt4py import gtscript +from gt4py.cartesian import gtscript from cloudsc4py.framework.stencil import function_collection from cloudsc4py.physics._stencils.fcttre import f_foeeice, f_foeeliq diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py index 0a304421..ca966aff 100644 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py +++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py @@ -9,7 +9,7 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -from gt4py import gtscript +from gt4py.cartesian import gtscript from cloudsc4py.framework.stencil import function_collection from cloudsc4py.utils.f2py import ported_function diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py index defc4c63..00cb8f3f 100644 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py +++ b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py @@ -9,7 +9,7 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -from gt4py import gtscript +from gt4py.cartesian import gtscript from cloudsc4py.framework.stencil import function_collection From 1a09b7df339cbca587ac8d6c90d9308176684f52 Mon Sep 17 00:00:00 2001 From: Stefano Ubbiali Date: Mon, 24 Apr 2023 16:48:44 +0200 Subject: [PATCH 019/174] Improve user control of floating point precision. --- src/cloudsc_python/drivers/config.py | 9 +++++++-- src/cloudsc_python/drivers/run.py | 8 ++++++++ src/cloudsc_python/drivers/run_split.py | 8 ++++++++ src/cloudsc_python/src/cloudsc4py/framework/config.py | 9 +++++++++ src/cloudsc_python/src/cloudsc4py/framework/stencil.py | 2 ++ src/cloudsc_python/src/cloudsc4py/utils/iox.py | 2 +- 6 files changed, 35 insertions(+), 3 deletions(-) diff --git a/src/cloudsc_python/drivers/config.py b/src/cloudsc_python/drivers/config.py index b9353d12..ac7e4e5d 100644 --- a/src/cloudsc_python/drivers/config.py +++ b/src/cloudsc_python/drivers/config.py @@ -14,7 +14,7 @@ from os.path import dirname, join, normpath, splitext from pydantic import BaseModel, validator import socket -from typing import Optional +from typing import Literal, Optional from cloudsc4py.framework.config import DataTypes, GT4PyConfig @@ -107,6 +107,11 @@ def with_num_runs(self, num_runs: Optional[int]) -> PythonConfig: args["num_runs"] = num_runs return PythonConfig(**args) + def with_precision(self, precision: Literal["double", "single"]) -> PythonConfig: + args = self.dict() + args["data_types"] = self.data_types.with_precision(precision) + return PythonConfig(**args) + def with_validation(self, enabled: bool) -> PythonConfig: args = self.dict() args["enable_validation"] = enabled @@ -120,7 +125,7 @@ def with_validation(self, enabled: bool) -> PythonConfig: input_file=join(config_files_dir, "input.h5"), reference_file=join(config_files_dir, "reference.h5"), num_runs=15, - data_types=DataTypes(bool=bool, float=np.float64, int=int), + data_types=DataTypes(bool=bool, float=np.float64, int=np.int64), gt4py_config=GT4PyConfig(backend="numpy", rebuild=False, validate_args=True, verbose=True), sympl_enable_checks=True, ) diff --git a/src/cloudsc_python/drivers/run.py b/src/cloudsc_python/drivers/run.py index ee31f4b1..607ca00a 100644 --- a/src/cloudsc_python/drivers/run.py +++ b/src/cloudsc_python/drivers/run.py @@ -130,6 +130,12 @@ def core(config: PythonConfig, io_config: IOConfig, cloudsc_cls: Type) -> None: default=1, help="Number of executions.\n\nDefault: 1.", ) +@click.option( + "--precision", + type=str, + default="double", + help="Select either `double` (default) or `single` precision.", +) @click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).") @click.option( "--output-csv-file", @@ -149,6 +155,7 @@ def main( enable_validation: bool, num_cols: Optional[int], num_runs: Optional[int], + precision: str, host_alias: Optional[str], output_csv_file: Optional[str], output_csv_file_stencils: Optional[str], @@ -164,6 +171,7 @@ def main( .with_validation(enable_validation) .with_num_cols(num_cols) .with_num_runs(num_runs) + .with_precision(precision) ) io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias) core(config, io_config, cloudsc_cls=Cloudsc) diff --git a/src/cloudsc_python/drivers/run_split.py b/src/cloudsc_python/drivers/run_split.py index 188e01aa..f29c9b5f 100644 --- a/src/cloudsc_python/drivers/run_split.py +++ b/src/cloudsc_python/drivers/run_split.py @@ -51,6 +51,12 @@ default=1, help="Number of executions.\n\nDefault: 1.", ) +@click.option( + "--precision", + type=str, + default="double", + help="Select either `double` (default) or `single` precision.", +) @click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).") @click.option( "--output-csv-file", @@ -70,6 +76,7 @@ def main( enable_validation: bool, num_cols: Optional[int], num_runs: Optional[int], + precision: str, host_alias: Optional[str], output_csv_file: Optional[str], output_csv_file_stencils: Optional[str], @@ -85,6 +92,7 @@ def main( .with_validation(enable_validation) .with_num_cols(num_cols) .with_num_runs(num_runs) + .with_precision(precision) ) io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias) core(config, io_config, cloudsc_cls=Cloudsc) diff --git a/src/cloudsc_python/src/cloudsc4py/framework/config.py b/src/cloudsc_python/src/cloudsc4py/framework/config.py index 8c63fe50..b4607e84 100644 --- a/src/cloudsc_python/src/cloudsc4py/framework/config.py +++ b/src/cloudsc_python/src/cloudsc4py/framework/config.py @@ -10,6 +10,7 @@ # nor does it submit to any jurisdiction. from __future__ import annotations +import numpy as np from pydantic import BaseModel, validator from typing import Any, Dict, Optional, Union, Type @@ -21,6 +22,14 @@ class DataTypes(BaseModel): float: Type int: Type + def with_precision(self, precision: Literal["double", "single"]) -> DataTypes: + if precision == "double": + return DataTypes(bool=bool, float=np.float64, int=np.int64) + elif precision == "single": + return DataTypes(bool=bool, float=np.float32, int=np.int32) + else: + raise ValueError("Either `double` or `single` precision supported.") + class GT4PyConfig(BaseModel): """Gather options controlling the compilation and execution of the code generated by GT4Py.""" diff --git a/src/cloudsc_python/src/cloudsc4py/framework/stencil.py b/src/cloudsc_python/src/cloudsc4py/framework/stencil.py index d4847281..4517262e 100644 --- a/src/cloudsc_python/src/cloudsc4py/framework/stencil.py +++ b/src/cloudsc_python/src/cloudsc4py/framework/stencil.py @@ -62,6 +62,8 @@ def compile_stencil( definition = stencil_info["definition"] dtypes = gt4py_config.dtypes.dict() + dtypes[float] = gt4py_config.dtypes.float + dtypes[int] = gt4py_config.dtypes.int externals = externals or {} kwargs = gt4py_config.backend_opts.copy() diff --git a/src/cloudsc_python/src/cloudsc4py/utils/iox.py b/src/cloudsc_python/src/cloudsc4py/utils/iox.py index be1efd68..53307c1b 100644 --- a/src/cloudsc_python/src/cloudsc4py/utils/iox.py +++ b/src/cloudsc_python/src/cloudsc4py/utils/iox.py @@ -228,7 +228,7 @@ def get_nlon(self) -> int: return self.f["KLON"][0] def get_timestep(self) -> timedelta: - return timedelta(seconds=self._get_parameter_f("PTSPHY")) + return timedelta(seconds=float(self._get_parameter_f("PTSPHY"))) @ported_method(from_file="common/module/yoecldp.F90", from_line=86, to_line=91) def get_yoecldp_parameters(self) -> YoecldpParameters: From 2cba075cc4da3abb88338bb3901cbfa15ec211b2 Mon Sep 17 00:00:00 2001 From: Stefano Ubbiali Date: Mon, 24 Apr 2023 16:53:16 +0200 Subject: [PATCH 020/174] Fix print statement. --- src/cloudsc_python/drivers/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudsc_python/drivers/run.py b/src/cloudsc_python/drivers/run.py index 607ca00a..7a844f86 100644 --- a/src/cloudsc_python/drivers/run.py +++ b/src/cloudsc_python/drivers/run.py @@ -57,7 +57,7 @@ def core(config: PythonConfig, io_config: IOConfig, cloudsc_cls: Type) -> None: for i in range(config.num_runs): with timing(f"run_{i}") as timer: cloudsc(state, dt, out_tendencies=tends, out_diagnostics=diags) - runtimes.append(timer.get_time(f"run_{i}") * 1000) + runtimes.append(timer.get_time(f"run_{i}")) runtime_mean, runtime_stddev = print_performance(runtimes) From 1c1046d33cdc5e3cbfd43b812de2a5732a4ebe95 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Sat, 4 Feb 2023 05:31:51 +0000 Subject: [PATCH 021/174] Python: Remove restrictive pre-commit hooks --- src/cloudsc_python/.pre-commit-config.yaml | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 src/cloudsc_python/.pre-commit-config.yaml diff --git a/src/cloudsc_python/.pre-commit-config.yaml b/src/cloudsc_python/.pre-commit-config.yaml deleted file mode 100644 index 6ff44b0d..00000000 --- a/src/cloudsc_python/.pre-commit-config.yaml +++ /dev/null @@ -1,22 +0,0 @@ -repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 - hooks: - - id: check-yaml - - id: check-added-large-files - - id: check-case-conflict - - id: check-json - - id: check-merge-conflict - - id: debug-statements - - id: end-of-file-fixer - - id: fix-encoding-pragma - - id: requirements-txt-fixer - - id: trailing-whitespace -- repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.8.0 - hooks: - - id: rst-backticks -- repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black From a412e6e60bcf869f65c3ad8444153d776ec12294 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 8 Feb 2023 09:44:59 +0000 Subject: [PATCH 022/174] Python-f2py: Add secondary package cloudscf2py to Python package --- src/cloudsc_python/setup.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cloudsc_python/setup.cfg b/src/cloudsc_python/setup.cfg index daaf1e95..6fb86478 100644 --- a/src/cloudsc_python/setup.cfg +++ b/src/cloudsc_python/setup.cfg @@ -27,7 +27,8 @@ packages = find: include_package_data = True python_requires = >= 3.7 package_dir = - =src + cloudsc4py = src/cloudsc4py + cloudscf2py = src/cloudscf2py install_requires = click gt4py[dace] >= 1.0.1 From 2b73d70456c14e3d2fc34071211b3f9a197fcf5d Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 8 Feb 2023 09:46:46 +0000 Subject: [PATCH 023/174] Python-f2py: Add draft driver with Loki-generation capabilities --- src/cloudsc_python/drivers/cloudsc_f2py.py | 100 + .../src/cloudscf2py/__init__.py | 13 + .../src/cloudscf2py/cloudsc.F90 | 2923 +++++++++++++++++ .../src/cloudscf2py/cloudsc_py.py | 2570 +++++++++++++++ .../src/cloudscf2py/include/abor1.intfb.h | 14 + .../src/cloudscf2py/include/fccld.base.h | 27 + .../src/cloudscf2py/include/fccld.func.h | 27 + .../src/cloudscf2py/include/fccld.ydthf.h | 3 + .../src/cloudscf2py/include/fcttre.base.h | 172 + .../src/cloudscf2py/include/fcttre.func.h | 174 + .../src/cloudscf2py/include/fcttre.ycst.h | 5 + src/cloudsc_python/src/cloudscf2py/inputs.py | 160 + .../src/cloudscf2py/yoecldp.F90 | 371 +++ src/cloudsc_python/src/cloudscf2py/yoethf.F90 | 160 + src/cloudsc_python/src/cloudscf2py/yomcst.F90 | 338 ++ 15 files changed, 7057 insertions(+) create mode 100644 src/cloudsc_python/drivers/cloudsc_f2py.py create mode 100644 src/cloudsc_python/src/cloudscf2py/__init__.py create mode 100644 src/cloudsc_python/src/cloudscf2py/cloudsc.F90 create mode 100644 src/cloudsc_python/src/cloudscf2py/cloudsc_py.py create mode 100644 src/cloudsc_python/src/cloudscf2py/include/abor1.intfb.h create mode 100644 src/cloudsc_python/src/cloudscf2py/include/fccld.base.h create mode 100644 src/cloudsc_python/src/cloudscf2py/include/fccld.func.h create mode 100644 src/cloudsc_python/src/cloudscf2py/include/fccld.ydthf.h create mode 100644 src/cloudsc_python/src/cloudscf2py/include/fcttre.base.h create mode 100644 src/cloudsc_python/src/cloudscf2py/include/fcttre.func.h create mode 100644 src/cloudsc_python/src/cloudscf2py/include/fcttre.ycst.h create mode 100644 src/cloudsc_python/src/cloudscf2py/inputs.py create mode 100644 src/cloudsc_python/src/cloudscf2py/yoecldp.F90 create mode 100644 src/cloudsc_python/src/cloudscf2py/yoethf.F90 create mode 100644 src/cloudsc_python/src/cloudscf2py/yomcst.F90 diff --git a/src/cloudsc_python/drivers/cloudsc_f2py.py b/src/cloudsc_python/drivers/cloudsc_f2py.py new file mode 100644 index 00000000..2393881d --- /dev/null +++ b/src/cloudsc_python/drivers/cloudsc_f2py.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- + +# (C) Copyright 2018- ECMWF. + +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +import click +from pathlib import Path + + +def loki_generate_kernel(source_path, out_path, include_dir=None): + from loki import Sourcefile, flatten + from loki.transform import FortranPythonTransformation + + source_dir = source_path.parent + headers = ['yoethf.F90', 'yoecldp.F90', 'yomcst.F90'] + definitions = flatten( + Sourcefile.from_file(source_dir/header).modules for header in headers + ) + + f2py = FortranPythonTransformation( + with_dace=False, suffix='_py', invert_indices=True + ) + + # Parse original driver and kernel routine, and enrich the driver + kernel = Sourcefile.from_file( + source_path, definitions=definitions, + includes=source_dir/'include', preprocess=True + ) + f2py.apply(kernel, role='kernel', path=out_path) + + +def run_cloudsc_kernel(nthreads, ngptot, nproma, input_path, reference_path): + from cloudscf2py import ( + load_input_fields, load_input_parameters, load_reference_fields, + cloudsc_py + ) + + fields = load_input_fields(path=input_path) + + yrecldp, yrmcst, yrethf, yrephli, yrecld = load_input_parameters(path=input_path) + + cloudsc_args = { + + } + cloudsc_args.update( (k.lower(), v) for k, v in fields.items() ) + + cloudsc_py( + kidia=1, kfdia=ngptot, **cloudsc_args, + yrecldp=yrecldp, ydcst=yrmcst, ydthf=yrethf, + ) + + reference = load_reference_fields(path=reference_path) + + +@click.command() +@click.option( + '--nthreads', default=1, + help='Number of OpenMP threads to use' +) +@click.option( + '--ngptot', default=100, + help='Total number of columns to use for benchamrking' +) +@click.option( + '--nproma', default=32, + help='Number of columns per block (NPROMA)' +) +@click.option( + '--generate/--no-generate', default=False, + help='(Re)generate kernel via Loki-Fortran-Python transform' +) +def dwarf_cloudsc(nthreads, ngptot, nproma, generate): + """ + Run a Python version of CLOUDSC and validate against reference data + """ + + here = Path(__file__).parent.absolute() + cloudsc_root = here.parent.parent.parent + cloudsc_f2py = here.parent/'src/cloudscf2py' + input_path = cloudsc_root/'config-files/input.h5' + reference_path = cloudsc_root/'config-files/reference.h5' + + if generate: + loki_generate_kernel( + source_path=cloudsc_f2py/'cloudsc.F90', out_path=cloudsc_f2py, + include_dir=cloudsc_root/'src/common/include' + ) + + run_cloudsc_kernel( + nthreads, ngptot, nproma, input_path=input_path, reference_path=reference_path + ) + + +if __name__ == "__main__": + dwarf_cloudsc() diff --git a/src/cloudsc_python/src/cloudscf2py/__init__.py b/src/cloudsc_python/src/cloudscf2py/__init__.py new file mode 100644 index 00000000..ec7382ab --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- + +# (C) Copyright 2018- ECMWF. + +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + + +from cloudscf2py.inputs import * +from cloudscf2py.cloudsc_py import * diff --git a/src/cloudsc_python/src/cloudscf2py/cloudsc.F90 b/src/cloudsc_python/src/cloudscf2py/cloudsc.F90 new file mode 100644 index 00000000..c979ceab --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/cloudsc.F90 @@ -0,0 +1,2923 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE CLOUDSC_MOD + +CONTAINS + +SUBROUTINE CLOUDSC & + !---input + & (KIDIA, KFDIA, KLON, KLEV,& + & PTSPHY, & + & PT, PQ, & + & TENDENCY_TMP_T, TENDENCY_TMP_Q, TENDENCY_TMP_A, TENDENCY_TMP_CLD, & + & TENDENCY_LOC_T, TENDENCY_LOC_Q, TENDENCY_LOC_A, TENDENCY_LOC_CLD, & + & PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, & + & PHRSW, PHRLW,& + & PVERVEL, PAP, PAPH,& + & PLSM, LDCUM, KTYPE, & + & PLU, PLUDE, PSNDE, PMFU, PMFD,& + !---prognostic fields + & PA,& + & PCLV, & + & PSUPSAT,& +!-- arrays for aerosol-cloud interactions +!!! & PQAER, KAER, & + & PLCRIT_AER,PICRIT_AER,& + & PRE_ICE,& + & PCCN, PNICE,& + !---diagnostic output + & PCOVPTOT, PRAINFRAC_TOPRFZ,& + !---resulting fluxes + & PFSQLF, PFSQIF , PFCQNNG, PFCQLNG,& + & PFSQRF, PFSQSF , PFCQRNG, PFCQSNG,& + & PFSQLTUR, PFSQITUR , & + & PFPLSL, PFPLSN, PFHPSL, PFHPSN,& + & YDCST, YDTHF, YRECLDP ) + ! & YRECLDP) + +!=============================================================================== +!**** *CLOUDSC* - ROUTINE FOR PARAMATERIZATION OF CLOUD PROCESSES +! FOR PROGNOSTIC CLOUD SCHEME +!! +! M.Tiedtke, C.Jakob, A.Tompkins, R.Forbes (E.C.M.W.F.) +!! +! PURPOSE +! ------- +! THIS ROUTINE UPDATES THE CONV/STRAT CLOUD FIELDS. +! THE FOLLOWING PROCESSES ARE CONSIDERED: +! - Detrainment of cloud water from convective updrafts +! - Evaporation/condensation of cloud water in connection +! with heating/cooling such as by subsidence/ascent +! - Erosion of clouds by turbulent mixing of cloud air +! with unsaturated environmental air +! - Deposition onto ice when liquid water present (Bergeron-Findeison) +! - Conversion of cloud water into rain (collision-coalescence) +! - Conversion of cloud ice to snow (aggregation) +! - Sedimentation of rain, snow and ice +! - Evaporation of rain and snow +! - Melting of snow and ice +! - Freezing of liquid and rain +! Note: Turbulent transports of s,q,u,v at cloud tops due to +! buoyancy fluxes and lw radiative cooling are treated in +! the VDF scheme +!! +! INTERFACE. +! ---------- +! *CLOUDSC* IS CALLED FROM *CALLPAR* +! THE ROUTINE TAKES ITS INPUT FROM THE LONG-TERM STORAGE: +! T,Q,L,PHI AND DETRAINMENT OF CLOUD WATER FROM THE +! CONVECTIVE CLOUDS (MASSFLUX CONVECTION SCHEME), BOUNDARY +! LAYER TURBULENT FLUXES OF HEAT AND MOISTURE, RADIATIVE FLUXES, +! OMEGA. +! IT RETURNS ITS OUTPUT TO: +! 1.MODIFIED TENDENCIES OF MODEL VARIABLES T AND Q +! AS WELL AS CLOUD VARIABLES L AND C +! 2.GENERATES PRECIPITATION FLUXES FROM STRATIFORM CLOUDS +!! +! EXTERNALS. +! ---------- +! NONE +!! +! MODIFICATIONS. +! ------------- +! M. TIEDTKE E.C.M.W.F. 8/1988, 2/1990 +! CH. JAKOB E.C.M.W.F. 2/1994 IMPLEMENTATION INTO IFS +! A.TOMPKINS E.C.M.W.F. 2002 NEW NUMERICS +! 01-05-22 : D.Salmond Safety modifications +! 02-05-29 : D.Salmond Optimisation +! 03-01-13 : J.Hague MASS Vector Functions J.Hague +! 03-10-01 : M.Hamrud Cleaning +! 04-12-14 : A.Tompkins New implicit solver and physics changes +! 04-12-03 : A.Tompkins & M.Ko"hler moist PBL +! G.Mozdzynski 09-Jan-2006 EXP security fix +! 19-01-09 : P.Bechtold Changed increased RCLDIFF value for KTYPE=2 +! 07-07-10 : A.Tompkins/R.Forbes 4-Phase flexible microphysics +! 01-03-11 : R.Forbes Mixed phase changes and tidy up +! 01-10-11 : R.Forbes Melt ice to rain, allow rain to freeze +! 01-10-11 : R.Forbes Limit supersat to avoid excessive values +! 31-10-11 : M.Ahlgrimm Add rain, snow and PEXTRA to DDH output +! 17-02-12 : F.Vana Simplified/optimized LU factorization +! 18-05-12 : F.Vana Cleaning + better support of sequential physics +! N.Semane+P.Bechtold 04-10-2012 Add RVRFACTOR factor for small planet +! 01-02-13 : R.Forbes New params of autoconv/acc,rain evap,snow riming +! 15-03-13 : F. Vana New dataflow + more tendencies from the first call +! K. Yessad (July 2014): Move some variables. +! F. Vana 05-Mar-2015 Support for single precision +! 15-01-15 : R.Forbes Added new options for snow evap & ice deposition +! 10-01-15 : R.Forbes New physics for rain freezing +! 23-10-14 : P. Bechtold remove zeroing of convection arrays +! +! SWITCHES. +! -------- +!! +! MODEL PARAMETERS +! ---------------- +! RCLDIFF: PARAMETER FOR EROSION OF CLOUDS +! RCLCRIT_SEA: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER SEA +! RCLCRIT_LAND: THRESHOLD VALUE FOR RAIN AUTOCONVERSION OVER LAND +! RLCRITSNOW: THRESHOLD VALUE FOR SNOW AUTOCONVERSION +! RKCONV: PARAMETER FOR AUTOCONVERSION OF CLOUDS (KESSLER) +! RCLDMAX: MAXIMUM POSSIBLE CLW CONTENT (MASON,1971) +!! +! REFERENCES. +! ---------- +! TIEDTKE MWR 1993 +! JAKOB PhD 2000 +! GREGORY ET AL. QJRMS 2000 +! TOMPKINS ET AL. QJRMS 2007 +!! +!=============================================================================== + +USE PARKIND1 , ONLY : JPIM, JPRB +USE YOMPHYDER ,ONLY : STATE_TYPE +! USE YOMCST , ONLY : RG, RD, RCPD, RETV, RLVTT, RLSTT, RLMLT, RTT, RV +! USE YOETHF , ONLY : R2ES, R3LES, R3IES, R4LES, R4IES, R5LES, R5IES, & +! & R5ALVCP, R5ALSCP, RALVDCP, RALSDCP, RALFDCP, RTWAT, RTICE, RTICECU, & +! & RTWAT_RTICE_R, RTWAT_RTICECU_R, RKOOP1, RKOOP2 +! USE YOECLDP , ONLY : TECLDP, NCLDQV, NCLDQL, NCLDQR, NCLDQI, NCLDQS, NCLV +! USE YOECLDP , ONLY : TECLDP, NCLDQV, NCLDQL, NCLDQR, NCLDQI, NCLDQS, NCLV +USE YOECLDP , ONLY : TECLDP +USE YOMCST , ONLY : TOMCST +USE YOETHF , ONLY : TOETHF + +! USE FCTTRE_MOD, ONLY: FOEDELTA, FOEALFA, FOEEWM, FOEEICE, FOEELIQ, FOELDCP, FOELDCPM, FOEDEM +! USE FCCLD_MOD, ONLY : FOKOOP + + +IMPLICIT NONE + +!------------------------------------------------------------------------------- +! Declare input/output arguments +!------------------------------------------------------------------------------- + +INTEGER(KIND=JPIM),PARAMETER :: NCLV=5 ! number of microphysics variables +INTEGER(KIND=JPIM),PARAMETER :: NCLDQL=1 ! liquid cloud water +INTEGER(KIND=JPIM),PARAMETER :: NCLDQI=2 ! ice cloud water +INTEGER(KIND=JPIM),PARAMETER :: NCLDQR=3 ! rain water +INTEGER(KIND=JPIM),PARAMETER :: NCLDQS=4 ! snow +INTEGER(KIND=JPIM),PARAMETER :: NCLDQV=5 ! vapour + + +! PLCRIT_AER : critical liquid mmr for rain autoconversion process +! PICRIT_AER : critical liquid mmr for snow autoconversion process +! PRE_LIQ : liq Re +! PRE_ICE : ice Re +! PCCN : liquid cloud condensation nuclei +! PNICE : ice number concentration (cf. CCN) + +REAL(KIND=JPRB) ,INTENT(IN) :: PLCRIT_AER(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PICRIT_AER(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PRE_ICE(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: PCCN(KLON,KLEV) ! liquid cloud condensation nuclei +REAL(KIND=JPRB) ,INTENT(IN) :: PNICE(KLON,KLEV) ! ice number concentration (cf. CCN) + +INTEGER(KIND=JPIM),INTENT(IN) :: KLON ! Number of grid points +INTEGER(KIND=JPIM),INTENT(IN) :: KLEV ! Number of levels +INTEGER(KIND=JPIM),INTENT(IN) :: KIDIA +INTEGER(KIND=JPIM),INTENT(IN) :: KFDIA +REAL(KIND=JPRB) ,INTENT(IN) :: PTSPHY ! Physics timestep +REAL(KIND=JPRB) ,INTENT(IN) :: PT(KLON,KLEV) ! T at start of callpar +REAL(KIND=JPRB) ,INTENT(IN) :: PQ(KLON,KLEV) ! Q at start of callpar +REAL(KIND=JPRB) ,INTENT(IN) :: TENDENCY_TMP_T(KLON, KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: TENDENCY_TMP_Q(KLON, KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: TENDENCY_TMP_A(KLON, KLEV) +REAL(KIND=JPRB) ,INTENT(IN) :: TENDENCY_TMP_CLD(KLON, KLEV, NCLV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_T(KLON, KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_Q(KLON, KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_A(KLON, KLEV) +REAL(KIND=JPRB) ,INTENT(INOUT) :: TENDENCY_LOC_CLD(KLON, KLEV, NCLV) +REAL(KIND=JPRB) ,INTENT(IN) :: PVFA(KLON,KLEV) ! CC from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PVFL(KLON,KLEV) ! Liq from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PVFI(KLON,KLEV) ! Ice from VDF scheme +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNA(KLON,KLEV) ! CC from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNL(KLON,KLEV) ! Liq from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PDYNI(KLON,KLEV) ! Liq from Dynamics +REAL(KIND=JPRB) ,INTENT(IN) :: PHRSW(KLON,KLEV) ! Short-wave heating rate +REAL(KIND=JPRB) ,INTENT(IN) :: PHRLW(KLON,KLEV) ! Long-wave heating rate +REAL(KIND=JPRB) ,INTENT(IN) :: PVERVEL(KLON,KLEV) !Vertical velocity +REAL(KIND=JPRB) ,INTENT(IN) :: PAP(KLON,KLEV) ! Pressure on full levels +REAL(KIND=JPRB) ,INTENT(IN) :: PAPH(KLON,KLEV+1)! Pressure on half levels +REAL(KIND=JPRB) ,INTENT(IN) :: PLSM(KLON) ! Land fraction (0-1) +LOGICAL ,INTENT(IN) :: LDCUM(KLON) ! Convection active +INTEGER(KIND=JPIM),INTENT(IN) :: KTYPE(KLON) ! Convection type 0,1,2 +REAL(KIND=JPRB) ,INTENT(IN) :: PLU(KLON,KLEV) ! Conv. condensate +REAL(KIND=JPRB) ,INTENT(INOUT) :: PLUDE(KLON,KLEV) ! Conv. detrained water +REAL(KIND=JPRB) ,INTENT(IN) :: PSNDE(KLON,KLEV) ! Conv. detrained snow +REAL(KIND=JPRB) ,INTENT(IN) :: PMFU(KLON,KLEV) ! Conv. mass flux up +REAL(KIND=JPRB) ,INTENT(IN) :: PMFD(KLON,KLEV) ! Conv. mass flux down +REAL(KIND=JPRB) ,INTENT(IN) :: PA(KLON,KLEV) ! Original Cloud fraction (t) + +REAL(KIND=JPRB) ,INTENT(IN) :: PCLV(KLON,KLEV,NCLV) + + ! Supersat clipped at previous time level in SLTEND +REAL(KIND=JPRB) ,INTENT(IN) :: PSUPSAT(KLON,KLEV) +REAL(KIND=JPRB) ,INTENT(OUT) :: PCOVPTOT(KLON,KLEV) ! Precip fraction +REAL(KIND=JPRB) ,INTENT(OUT) :: PRAINFRAC_TOPRFZ(KLON) +! Flux diagnostics for DDH budget +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQLF(KLON,KLEV+1) ! Flux of liquid +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQIF(KLON,KLEV+1) ! Flux of ice +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQLNG(KLON,KLEV+1) ! -ve corr for liq +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQNNG(KLON,KLEV+1) ! -ve corr for ice +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQRF(KLON,KLEV+1) ! Flux diagnostics +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQSF(KLON,KLEV+1) ! for DDH, generic +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQRNG(KLON,KLEV+1) ! rain +REAL(KIND=JPRB) ,INTENT(OUT) :: PFCQSNG(KLON,KLEV+1) ! snow +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQLTUR(KLON,KLEV+1) ! liquid flux due to VDF +REAL(KIND=JPRB) ,INTENT(OUT) :: PFSQITUR(KLON,KLEV+1) ! ice flux due to VDF +REAL(KIND=JPRB) ,INTENT(OUT) :: PFPLSL(KLON,KLEV+1) ! liq+rain sedim flux +REAL(KIND=JPRB) ,INTENT(OUT) :: PFPLSN(KLON,KLEV+1) ! ice+snow sedim flux +REAL(KIND=JPRB) ,INTENT(OUT) :: PFHPSL(KLON,KLEV+1) ! Enthalpy flux for liq +REAL(KIND=JPRB) ,INTENT(OUT) :: PFHPSN(KLON,KLEV+1) ! Enthalp flux for ice + +! TYPE(TECLDP), INTENT(INOUT) :: YRECLDP + +TYPE(TOMCST) ,INTENT(IN) :: YDCST +TYPE(TOETHF) ,INTENT(IN) :: YDTHF +TYPE(TECLDP) ,INTENT(IN) :: YRECLDP + +!------------------------------------------------------------------------------- +! Declare local variables +!------------------------------------------------------------------------------- + +REAL(KIND=JPRB) :: & +! condensation and evaporation terms + & ZLCOND1(KLON), ZLCOND2(KLON),& + & ZLEVAP, ZLEROS,& + & ZLEVAPL(KLON), ZLEVAPI(KLON),& +! autoconversion terms + & ZRAINAUT(KLON), ZSNOWAUT(KLON), & + & ZLIQCLD(KLON), ZICECLD(KLON) +REAL(KIND=JPRB) :: ZFOKOOP(KLON) +REAL(KIND=JPRB) :: ZFOEALFA(KLON,KLEV+1) +REAL(KIND=JPRB) :: ZICENUCLEI(KLON) ! number concentration of ice nuclei + +REAL(KIND=JPRB) :: ZLICLD(KLON) +REAL(KIND=JPRB) :: ZACOND +REAL(KIND=JPRB) :: ZAEROS +REAL(KIND=JPRB) :: ZLFINALSUM(KLON) +REAL(KIND=JPRB) :: ZDQS(KLON) +REAL(KIND=JPRB) :: ZTOLD(KLON) +REAL(KIND=JPRB) :: ZQOLD(KLON) +REAL(KIND=JPRB) :: ZDTGDP(KLON) +REAL(KIND=JPRB) :: ZRDTGDP(KLON) +REAL(KIND=JPRB) :: ZTRPAUS(KLON) +REAL(KIND=JPRB) :: ZCOVPCLR(KLON) +REAL(KIND=JPRB) :: ZPRECLR +REAL(KIND=JPRB) :: ZCOVPTOT(KLON) +REAL(KIND=JPRB) :: ZCOVPMAX(KLON) +REAL(KIND=JPRB) :: ZQPRETOT(KLON) +REAL(KIND=JPRB) :: ZDPEVAP +REAL(KIND=JPRB) :: ZDTFORC +REAL(KIND=JPRB) :: ZDTDIAB +REAL(KIND=JPRB) :: ZTP1(KLON,KLEV) +REAL(KIND=JPRB) :: ZLDEFR(KLON) +REAL(KIND=JPRB) :: ZLDIFDT(KLON) +REAL(KIND=JPRB) :: ZDTGDPF(KLON) +REAL(KIND=JPRB) :: ZLCUST(KLON,NCLV) +REAL(KIND=JPRB) :: ZACUST(KLON) +REAL(KIND=JPRB) :: ZMF(KLON) + +REAL(KIND=JPRB) :: ZRHO(KLON) +REAL(KIND=JPRB) :: ZTMP1(KLON),ZTMP2(KLON),ZTMP3(KLON) +REAL(KIND=JPRB) :: ZTMP4(KLON),ZTMP5(KLON),ZTMP6(KLON),ZTMP7(KLON) +REAL(KIND=JPRB) :: ZALFAWM(KLON) + +! Accumulators of A,B,and C factors for cloud equations +REAL(KIND=JPRB) :: ZSOLAB(KLON) ! -ve implicit CC +REAL(KIND=JPRB) :: ZSOLAC(KLON) ! linear CC +REAL(KIND=JPRB) :: ZANEW +REAL(KIND=JPRB) :: ZANEWM1(KLON) + +REAL(KIND=JPRB) :: ZGDP(KLON) + +!---for flux calculation +REAL(KIND=JPRB) :: ZDA(KLON) +REAL(KIND=JPRB) :: ZLI(KLON,KLEV), ZA(KLON,KLEV) +REAL(KIND=JPRB) :: ZAORIG(KLON,KLEV) ! start of scheme value for CC + +LOGICAL :: LLFLAG(KLON) +LOGICAL :: LLO1 + +INTEGER(KIND=JPIM) :: ICALL, IK, JK, JL, JM, JN, JO, JLEN, IS + +REAL(KIND=JPRB) :: ZDP(KLON), ZPAPHD(KLON) + +REAL(KIND=JPRB) :: ZALFA +! & ZALFACU, ZALFALS +REAL(KIND=JPRB) :: ZALFAW +REAL(KIND=JPRB) :: ZBETA,ZBETA1 +!REAL(KIND=JPRB) :: ZBOTT +REAL(KIND=JPRB) :: ZCFPR +REAL(KIND=JPRB) :: ZCOR +REAL(KIND=JPRB) :: ZCDMAX +REAL(KIND=JPRB) :: ZMIN(KLON) +REAL(KIND=JPRB) :: ZLCONDLIM +REAL(KIND=JPRB) :: ZDENOM +REAL(KIND=JPRB) :: ZDPMXDT +REAL(KIND=JPRB) :: ZDPR +REAL(KIND=JPRB) :: ZDTDP +REAL(KIND=JPRB) :: ZE +REAL(KIND=JPRB) :: ZEPSEC +REAL(KIND=JPRB) :: ZFAC, ZFACI, ZFACW +REAL(KIND=JPRB) :: ZGDCP +REAL(KIND=JPRB) :: ZINEW +REAL(KIND=JPRB) :: ZLCRIT +REAL(KIND=JPRB) :: ZMFDN +REAL(KIND=JPRB) :: ZPRECIP +REAL(KIND=JPRB) :: ZQE +REAL(KIND=JPRB) :: ZQSAT, ZQTMST, ZRDCP +REAL(KIND=JPRB) :: ZRHC, ZSIG, ZSIGK +REAL(KIND=JPRB) :: ZWTOT +REAL(KIND=JPRB) :: ZZCO, ZZDL, ZZRH, ZZZDT, ZQADJ +REAL(KIND=JPRB) :: ZQNEW, ZTNEW +REAL(KIND=JPRB) :: ZRG_R,ZGDPH_R,ZCONS1,ZCOND,ZCONS1A +REAL(KIND=JPRB) :: ZLFINAL +REAL(KIND=JPRB) :: ZMELT +REAL(KIND=JPRB) :: ZEVAP +REAL(KIND=JPRB) :: ZFRZ +REAL(KIND=JPRB) :: ZVPLIQ, ZVPICE +REAL(KIND=JPRB) :: ZADD, ZBDD, ZCVDS, ZICE0, ZDEPOS +REAL(KIND=JPRB) :: ZSUPSAT(KLON) +REAL(KIND=JPRB) :: ZFALL +REAL(KIND=JPRB) :: ZRE_ICE +REAL(KIND=JPRB) :: ZRLDCP +REAL(KIND=JPRB) :: ZQP1ENV + +!---------------------------- +! Arrays for new microphysics +!---------------------------- +INTEGER(KIND=JPIM) :: IPHASE(NCLV) ! marker for water phase of each species + ! 0=vapour, 1=liquid, 2=ice + +INTEGER(KIND=JPIM) :: IMELT(NCLV) ! marks melting linkage for ice categories + ! ice->liquid, snow->rain + +LOGICAL :: LLFALL(NCLV) ! marks falling species + ! LLFALL=0, cloud cover must > 0 for zqx > 0 + ! LLFALL=1, no cloud needed, zqx can evaporate + +LOGICAL :: LLINDEX1(KLON,NCLV) ! index variable +LOGICAL :: LLINDEX3(KLON,NCLV,NCLV) ! index variable +REAL(KIND=JPRB) :: ZMAX +REAL(KIND=JPRB) :: ZRAT +INTEGER(KIND=JPIM) :: IORDER(KLON,NCLV) ! array for sorting explicit terms + +REAL(KIND=JPRB) :: ZLIQFRAC(KLON,KLEV) ! cloud liquid water fraction: ql/(ql+qi) +REAL(KIND=JPRB) :: ZICEFRAC(KLON,KLEV) ! cloud ice water fraction: qi/(ql+qi) +REAL(KIND=JPRB) :: ZQX(KLON,KLEV,NCLV) ! water variables +REAL(KIND=JPRB) :: ZQX0(KLON,KLEV,NCLV) ! water variables at start of scheme +REAL(KIND=JPRB) :: ZQXN(KLON,NCLV) ! new values for zqx at time+1 +REAL(KIND=JPRB) :: ZQXFG(KLON,NCLV) ! first guess values including precip +REAL(KIND=JPRB) :: ZQXNM1(KLON,NCLV) ! new values for zqx at time+1 at level above +REAL(KIND=JPRB) :: ZFLUXQ(KLON,NCLV) ! fluxes convergence of species (needed?) +! Keep the following for possible future total water variance scheme? +!REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature +!REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction +!REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance +!REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) +!REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + +REAL(KIND=JPRB) :: ZPFPLSX(KLON,KLEV+1,NCLV) ! generalized precipitation flux +REAL(KIND=JPRB) :: ZLNEG(KLON,KLEV,NCLV) ! for negative correction diagnostics +REAL(KIND=JPRB) :: ZMELTMAX(KLON) +REAL(KIND=JPRB) :: ZFRZMAX(KLON) +REAL(KIND=JPRB) :: ZICETOT(KLON) + +REAL(KIND=JPRB) :: ZQXN2D(KLON,KLEV,NCLV) ! water variables store + +REAL(KIND=JPRB) :: ZQSMIX(KLON,KLEV) ! diagnostic mixed phase saturation +!REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation +REAL(KIND=JPRB) :: ZQSLIQ(KLON,KLEV) ! liquid water saturation +REAL(KIND=JPRB) :: ZQSICE(KLON,KLEV) ! ice water saturation + +!REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH +!REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq +!REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + +REAL(KIND=JPRB) :: ZFOEEWMT(KLON,KLEV) +REAL(KIND=JPRB) :: ZFOEEW(KLON,KLEV) +REAL(KIND=JPRB) :: ZFOEELIQT(KLON,KLEV) +!REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + +REAL(KIND=JPRB) :: ZDQSLIQDT(KLON), ZDQSICEDT(KLON), ZDQSMIXDT(KLON) +REAL(KIND=JPRB) :: ZCORQSLIQ(KLON) +REAL(KIND=JPRB) :: ZCORQSICE(KLON) +!REAL(KIND=JPRB) :: ZCORQSBIN(KLON) +REAL(KIND=JPRB) :: ZCORQSMIX(KLON) +REAL(KIND=JPRB) :: ZEVAPLIMLIQ(KLON), ZEVAPLIMICE(KLON), ZEVAPLIMMIX(KLON) + +!------------------------------------------------------- +! SOURCE/SINK array for implicit and explicit terms +!------------------------------------------------------- +! a POSITIVE value entered into the arrays is a... +! Source of this variable +! | +! | Sink of this variable +! | | +! V V +! ZSOLQA(JL,IQa,IQb) = explicit terms +! ZSOLQB(JL,IQa,IQb) = implicit terms +! Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is +! a source of NCLDQL and a sink of IQV +! put 'magic' source terms such as PLUDE from +! detrainment into explicit source/sink array diagnognal +! ZSOLQA(NCLDQL,NCLDQL)= -PLUDE +! i.e. A positive value is a sink!????? weird... +!------------------------------------------------------- + +REAL(KIND=JPRB) :: ZSOLQA(KLON,NCLV,NCLV) ! explicit sources and sinks +REAL(KIND=JPRB) :: ZSOLQB(KLON,NCLV,NCLV) ! implicit sources and sinks + ! e.g. microphysical pathways between ice variables. +REAL(KIND=JPRB) :: ZQLHS(KLON,NCLV,NCLV) ! n x n matrix storing the LHS of implicit solver +REAL(KIND=JPRB) :: ZVQX(NCLV) ! fall speeds of three categories +REAL(KIND=JPRB) :: ZEXPLICIT, ZRATIO(KLON,NCLV), ZSINKSUM(KLON,NCLV) + +! for sedimentation source/sink terms +REAL(KIND=JPRB) :: ZFALLSINK(KLON,NCLV) +REAL(KIND=JPRB) :: ZFALLSRCE(KLON,NCLV) + +! for convection detrainment source and subsidence source/sink terms +REAL(KIND=JPRB) :: ZCONVSRCE(KLON,NCLV) +REAL(KIND=JPRB) :: ZCONVSINK(KLON,NCLV) + +! for supersaturation source term from previous timestep +REAL(KIND=JPRB) :: ZPSUPSATSRCE(KLON,NCLV) + +! Numerical fit to wet bulb temperature +REAL(KIND=JPRB),PARAMETER :: ZTW1 = 1329.31_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW2 = 0.0074615_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW3 = 0.85E5_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW4 = 40.637_JPRB +REAL(KIND=JPRB),PARAMETER :: ZTW5 = 275.0_JPRB + +REAL(KIND=JPRB) :: ZSUBSAT ! Subsaturation for snow melting term +REAL(KIND=JPRB) :: ZTDMTW0 ! Diff between dry-bulb temperature and + ! temperature when wet-bulb = 0degC + +! Variables for deposition term +REAL(KIND=JPRB) :: ZTCG ! Temperature dependent function for ice PSD +REAL(KIND=JPRB) :: ZFACX1I, ZFACX1S! PSD correction factor +REAL(KIND=JPRB) :: ZAPLUSB,ZCORRFAC,ZCORRFAC2,ZPR02,ZTERM1,ZTERM2 ! for ice dep +REAL(KIND=JPRB) :: ZCLDTOPDIST(KLON) ! Distance from cloud top +REAL(KIND=JPRB) :: ZINFACTOR ! No. of ice nuclei factor for deposition + +! Autoconversion/accretion/riming/evaporation +INTEGER(KIND=JPIM) :: IWARMRAIN +INTEGER(KIND=JPIM) :: IEVAPRAIN +INTEGER(KIND=JPIM) :: IEVAPSNOW +INTEGER(KIND=JPIM) :: IDEPICE +REAL(KIND=JPRB) :: ZRAINACC(KLON) +REAL(KIND=JPRB) :: ZRAINCLD(KLON) +REAL(KIND=JPRB) :: ZSNOWRIME(KLON) +REAL(KIND=JPRB) :: ZSNOWCLD(KLON) +REAL(KIND=JPRB) :: ZESATLIQ +REAL(KIND=JPRB) :: ZFALLCORR +REAL(KIND=JPRB) :: ZLAMBDA +REAL(KIND=JPRB) :: ZEVAP_DENOM +REAL(KIND=JPRB) :: ZCORR2 +REAL(KIND=JPRB) :: ZKA +REAL(KIND=JPRB) :: ZCONST +REAL(KIND=JPRB) :: ZTEMP + +! Rain freezing +LOGICAL :: LLRAINLIQ(KLON) ! True if majority of raindrops are liquid (no ice core) + +!---------------------------- +! End: new microphysics +!---------------------------- + +!---------------------- +! SCM budget statistics +!---------------------- +REAL(KIND=JPRB) :: ZRAIN + +REAL(KIND=JPRB) :: ZHOOK_HANDLE +REAL(KIND=JPRB) :: ZTMPL,ZTMPI,ZTMPA + +REAL(KIND=JPRB) :: ZMM,ZRR +REAL(KIND=JPRB) :: ZRG(KLON) + +REAL(KIND=JPRB) :: ZZSUM, ZZRATIO +REAL(KIND=JPRB) :: ZEPSILON + +REAL(KIND=JPRB) :: ZCOND1, ZQP + +REAL(KIND=JPRB) :: PSUM_SOLQA(KLON) + +! #include "fcttre.func.h" +! #include "fccld.func.h" + +#include "fcttre.ycst.h" +#include "fccld.ydthf.h" + +!=============================================================================== +!IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) +ASSOCIATE(LAERICEAUTO=>YRECLDP%LAERICEAUTO, LAERICESED=>YRECLDP%LAERICESED, & + & LAERLIQAUTOLSP=>YRECLDP%LAERLIQAUTOLSP, LAERLIQCOLL=>YRECLDP%LAERLIQCOLL, & + & LCLDBUDGET=>YRECLDP%LCLDBUDGET, NCLDTOP=>YRECLDP%NCLDTOP, & + & NSSOPT=>YRECLDP%NSSOPT, RAMID=>YRECLDP%RAMID, RAMIN=>YRECLDP%RAMIN, & + & RCCN=>YRECLDP%RCCN, RCLCRIT_LAND=>YRECLDP%RCLCRIT_LAND, & + & RCLCRIT_SEA=>YRECLDP%RCLCRIT_SEA, RCLDIFF=>YRECLDP%RCLDIFF, & + & RCLDIFF_CONVI=>YRECLDP%RCLDIFF_CONVI, RCLDTOPCF=>YRECLDP%RCLDTOPCF, & + & RCL_APB1=>YRECLDP%RCL_APB1, RCL_APB2=>YRECLDP%RCL_APB2, & + & RCL_APB3=>YRECLDP%RCL_APB3, RCL_CDENOM1=>YRECLDP%RCL_CDENOM1, & + & RCL_CDENOM2=>YRECLDP%RCL_CDENOM2, RCL_CDENOM3=>YRECLDP%RCL_CDENOM3, & + & RCL_CONST1I=>YRECLDP%RCL_CONST1I, RCL_CONST1R=>YRECLDP%RCL_CONST1R, & + & RCL_CONST1S=>YRECLDP%RCL_CONST1S, RCL_CONST2I=>YRECLDP%RCL_CONST2I, & + & RCL_CONST2R=>YRECLDP%RCL_CONST2R, RCL_CONST2S=>YRECLDP%RCL_CONST2S, & + & RCL_CONST3I=>YRECLDP%RCL_CONST3I, RCL_CONST3R=>YRECLDP%RCL_CONST3R, & + & RCL_CONST3S=>YRECLDP%RCL_CONST3S, RCL_CONST4I=>YRECLDP%RCL_CONST4I, & + & RCL_CONST4R=>YRECLDP%RCL_CONST4R, RCL_CONST4S=>YRECLDP%RCL_CONST4S, & + & RCL_CONST5I=>YRECLDP%RCL_CONST5I, RCL_CONST5R=>YRECLDP%RCL_CONST5R, & + & RCL_CONST5S=>YRECLDP%RCL_CONST5S, RCL_CONST6I=>YRECLDP%RCL_CONST6I, & + & RCL_CONST6R=>YRECLDP%RCL_CONST6R, RCL_CONST6S=>YRECLDP%RCL_CONST6S, & + & RCL_CONST7S=>YRECLDP%RCL_CONST7S, RCL_CONST8S=>YRECLDP%RCL_CONST8S, & + & RCL_FAC1=>YRECLDP%RCL_FAC1, RCL_FAC2=>YRECLDP%RCL_FAC2, & + & RCL_FZRAB=>YRECLDP%RCL_FZRAB, RCL_KA273=>YRECLDP%RCL_KA273, & + & RCL_KKAAC=>YRECLDP%RCL_KKAAC, RCL_KKAAU=>YRECLDP%RCL_KKAAU, & + & RCL_KKBAC=>YRECLDP%RCL_KKBAC, RCL_KKBAUN=>YRECLDP%RCL_KKBAUN, & + & RCL_KKBAUQ=>YRECLDP%RCL_KKBAUQ, & + & RCL_KK_CLOUD_NUM_LAND=>YRECLDP%RCL_KK_CLOUD_NUM_LAND, & + & RCL_KK_CLOUD_NUM_SEA=>YRECLDP%RCL_KK_CLOUD_NUM_SEA, RCL_X3I=>YRECLDP%RCL_X3I, & + & RCOVPMIN=>YRECLDP%RCOVPMIN, RDENSREF=>YRECLDP%RDENSREF, & + & RDEPLIQREFDEPTH=>YRECLDP%RDEPLIQREFDEPTH, & + & RDEPLIQREFRATE=>YRECLDP%RDEPLIQREFRATE, RICEHI1=>YRECLDP%RICEHI1, & + & RICEHI2=>YRECLDP%RICEHI2, RICEINIT=>YRECLDP%RICEINIT, RKCONV=>YRECLDP%RKCONV, & + & RKOOPTAU=>YRECLDP%RKOOPTAU, RLCRITSNOW=>YRECLDP%RLCRITSNOW, & + & RLMIN=>YRECLDP%RLMIN, RNICE=>YRECLDP%RNICE, RPECONS=>YRECLDP%RPECONS, & + & RPRC1=>YRECLDP%RPRC1, RPRECRHMAX=>YRECLDP%RPRECRHMAX, & + & RSNOWLIN1=>YRECLDP%RSNOWLIN1, RSNOWLIN2=>YRECLDP%RSNOWLIN2, & + & RTAUMEL=>YRECLDP%RTAUMEL, RTHOMO=>YRECLDP%RTHOMO, RVICE=>YRECLDP%RVICE, & + & RVRAIN=>YRECLDP%RVRAIN, RVRFACTOR=>YRECLDP%RVRFACTOR, & + & RVSNOW=>YRECLDP%RVSNOW, & + & RG=>YDCST%RG, RD=>YDCST%RD, RCPD=>YDCST%RCPD, RETV=>YDCST%RETV, & + & RLVTT=>YDCST%RLVTT, RLSTT=>YDCST%RLSTT, RLMLT=>YDCST%RLMLT, & + & RTT=>YDCST%RTT, RV=>YDCST%RV, & + & R2ES=>YDTHF%R2ES, R3LES=>YDTHF%R3LES, R3IES=>YDTHF%R3IES, & + & R4LES=>YDTHF%R4LES, R4IES=>YDTHF%R4IES, R5LES=>YDTHF%R5LES, & + & R5IES=>YDTHF%R5IES, R5ALVCP=>YDTHF%R5ALVCP, R5ALSCP=>YDTHF%R5ALSCP, & + & RALVDCP=>YDTHF%RALVDCP, RALSDCP=>YDTHF%RALSDCP, & + & RALFDCP=>YDTHF%RALFDCP, RTWAT=>YDTHF%RTWAT, RTICE=>YDTHF%RTICE, & + & RTICECU=>YDTHF%RTICECU, RTWAT_RTICE_R=>YDTHF%RTWAT_RTICE_R, & + & RTWAT_RTICECU_R=>YDTHF%RTWAT_RTICECU_R, RKOOP1=>YDTHF%RKOOP1, & + & RKOOP2=>YDTHF%RKOOP2 & + & ) + +! YDCST, YDTHF + + + + + +!=============================================================================== +! 0.0 Beginning of timestep book-keeping +!---------------------------------------------------------------------- + + +!###################################################################### +! 0. *** SET UP CONSTANTS *** +!###################################################################### + +! ZEPSILON=100._JPRB*EPSILON(ZEPSILON) +ZEPSILON=1.E-14_JPRB + +! --------------------------------------------------------------------- +! Set version of warm-rain autoconversion/accretion +! IWARMRAIN = 1 ! Sundquist +! IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) +! --------------------------------------------------------------------- +IWARMRAIN = 2 +! --------------------------------------------------------------------- +! Set version of rain evaporation +! IEVAPRAIN = 1 ! Sundquist +! IEVAPRAIN = 2 ! Abel and Boutle (2013) +! --------------------------------------------------------------------- +IEVAPRAIN = 2 +! --------------------------------------------------------------------- +! Set version of snow evaporation +! IEVAPSNOW = 1 ! Sundquist +! IEVAPSNOW = 2 ! New +! --------------------------------------------------------------------- +IEVAPSNOW = 1 +! --------------------------------------------------------------------- +! Set version of ice deposition +! IDEPICE = 1 ! Rotstayn (2001) +! IDEPICE = 2 ! New +! --------------------------------------------------------------------- +IDEPICE = 1 + +! --------------------- +! Some simple constants +! --------------------- +ZQTMST = 1.0_JPRB/PTSPHY +ZGDCP = RG/RCPD +ZRDCP = RD/RCPD +ZCONS1A = RCPD/(RLMLT*RG*RTAUMEL) +ZEPSEC = 1.E-14_JPRB +ZRG_R = 1.0_JPRB/RG +ZRLDCP = 1.0_JPRB/(RALSDCP-RALVDCP) + +! Note: Defined in module/yoecldp.F90 +! NCLDQL=1 ! liquid cloud water +! NCLDQI=2 ! ice cloud water +! NCLDQR=3 ! rain water +! NCLDQS=4 ! snow +! NCLDQV=5 ! vapour + +! ----------------------------------------------- +! Define species phase, 0=vapour, 1=liquid, 2=ice +! ----------------------------------------------- +IPHASE(NCLDQV)=0 +IPHASE(NCLDQL)=1 +IPHASE(NCLDQR)=1 +IPHASE(NCLDQI)=2 +IPHASE(NCLDQS)=2 + +! --------------------------------------------------- +! Set up melting/freezing index, +! if an ice category melts/freezes, where does it go? +! --------------------------------------------------- +IMELT(NCLDQV)=-99 +IMELT(NCLDQL)=NCLDQI +IMELT(NCLDQR)=NCLDQS +IMELT(NCLDQI)=NCLDQR +IMELT(NCLDQS)=NCLDQR + +! ----------------------------------------------- +! INITIALIZATION OF OUTPUT TENDENCIES +! ----------------------------------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + TENDENCY_LOC_T(JL,JK)=0.0_JPRB + TENDENCY_LOC_Q(JL,JK)=0.0_JPRB + TENDENCY_LOC_A(JL,JK)=0.0_JPRB + ENDDO +ENDDO +DO JM=1,NCLV-1 + DO JK=1,KLEV + DO JL=KIDIA,KFDIA + TENDENCY_LOC_CLD(JL,JK,JM)=0.0_JPRB + ENDDO + ENDDO +ENDDO + +!-- These were uninitialized : meaningful only when we compare error differences +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + PCOVPTOT(JL,JK) = 0.0_JPRB + TENDENCY_LOC_CLD(JL,JK,NCLV) = 0.0_JPRB + ENDDO +ENDDO + +! ------------------------- +! set up fall speeds in m/s +! ------------------------- +ZVQX(NCLDQV)=0.0_JPRB +ZVQX(NCLDQL)=0.0_JPRB +ZVQX(NCLDQI)=RVICE +ZVQX(NCLDQR)=RVRAIN +ZVQX(NCLDQS)=RVSNOW +LLFALL(:)=.FALSE. +DO JM=1,NCLV + IF (ZVQX(JM)>0.0_JPRB) LLFALL(JM)=.TRUE. ! falling species +ENDDO +! Set LLFALL to false for ice (but ice still sediments!) +! Need to rationalise this at some point +LLFALL(NCLDQI)=.FALSE. + + +!###################################################################### +! 1. *** INITIAL VALUES FOR VARIABLES *** +!###################################################################### + + +! ---------------------- +! non CLV initialization +! ---------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + ZTP1(JL,JK) = PT(JL,JK)+PTSPHY*TENDENCY_TMP_T(JL,JK) + ZQX(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*TENDENCY_TMP_Q(JL,JK) + ZQX0(JL,JK,NCLDQV) = PQ(JL,JK)+PTSPHY*TENDENCY_TMP_Q(JL,JK) + ZA(JL,JK) = PA(JL,JK)+PTSPHY*TENDENCY_TMP_A(JL,JK) + ZAORIG(JL,JK) = PA(JL,JK)+PTSPHY*TENDENCY_TMP_A(JL,JK) + ENDDO +ENDDO + +! ------------------------------------- +! initialization for CLV family +! ------------------------------------- +DO JM=1,NCLV-1 + DO JK=1,KLEV + DO JL=KIDIA,KFDIA + ZQX(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*TENDENCY_TMP_CLD(JL,JK,JM) + ZQX0(JL,JK,JM) = PCLV(JL,JK,JM)+PTSPHY*TENDENCY_TMP_CLD(JL,JK,JM) + ENDDO + ENDDO +ENDDO + +!------------- +! zero arrays +!------------- +DO JM=1,NCLV + DO JK=1,KLEV+1 + DO JL=KIDIA,KFDIA + ZPFPLSX(JL,JK,JM) = 0.0_JPRB ! precip fluxes + ENDDO + ENDDO +ENDDO + +DO JM=1,NCLV + DO JK=1,KLEV + DO JL=KIDIA,KFDIA + ZQXN2D(JL,JK,JM) = 0.0_JPRB ! end of timestep values in 2D + ZLNEG(JL,JK,JM) = 0.0_JPRB ! negative input check + ENDDO + ENDDO +ENDDO + +DO JL=KIDIA,KFDIA + PRAINFRAC_TOPRFZ(JL) =0.0_JPRB ! rain fraction at top of refreezing layer +ENDDO +LLRAINLIQ(:) = .TRUE. ! Assume all raindrops are liquid initially + +! ---------------------------------------------------- +! Tidy up very small cloud cover or total cloud water +! ---------------------------------------------------- +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + IF (ZQX(JL,JK,NCLDQL)+ZQX(JL,JK,NCLDQI)273K + !--------------------------------------------- + ZALFA=FOEDELTA(ZTP1(JL,JK)) + ZFOEEW(JL,JK)=MIN((ZALFA*FOEELIQ(ZTP1(JL,JK))+ & + & (1.0_JPRB-ZALFA)*FOEEICE(ZTP1(JL,JK)))/PAP(JL,JK),0.5_JPRB) + ZFOEEW(JL,JK)=MIN(0.5_JPRB,ZFOEEW(JL,JK)) + ZQSICE(JL,JK)=ZFOEEW(JL,JK)/(1.0_JPRB-RETV*ZFOEEW(JL,JK)) + + !---------------------------------- + ! liquid water saturation + !---------------------------------- + ZFOEELIQT(JL,JK)=MIN(FOEELIQ(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + ZQSLIQ(JL,JK)=ZFOEELIQT(JL,JK) + ZQSLIQ(JL,JK)=ZQSLIQ(JL,JK)/(1.0_JPRB-RETV*ZQSLIQ(JL,JK)) + +! !---------------------------------- +! ! ice water saturation +! !---------------------------------- +! ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) +! ZQSICE(JL,JK)=ZFOEEICET(JL,JK) +! ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + ENDDO + +ENDDO + +DO JK=1,KLEV + DO JL=KIDIA,KFDIA + + + !------------------------------------------ + ! Ensure cloud fraction is between 0 and 1 + !------------------------------------------ + ZA(JL,JK)=MAX(0.0_JPRB,MIN(1.0_JPRB,ZA(JL,JK))) + + !------------------------------------------------------------------- + ! Calculate liq/ice fractions (no longer a diagnostic relationship) + !------------------------------------------------------------------- + ZLI(JL,JK)=ZQX(JL,JK,NCLDQL)+ZQX(JL,JK,NCLDQI) + IF (ZLI(JL,JK)>RLMIN) THEN + ZLIQFRAC(JL,JK)=ZQX(JL,JK,NCLDQL)/ZLI(JL,JK) + ZICEFRAC(JL,JK)=1.0_JPRB-ZLIQFRAC(JL,JK) + ELSE + ZLIQFRAC(JL,JK)=0.0_JPRB + ZICEFRAC(JL,JK)=0.0_JPRB + ENDIF + + ENDDO +ENDDO + +!###################################################################### +! 2. *** CONSTANTS AND PARAMETERS *** +!###################################################################### +! Calculate L in updrafts of bl-clouds +! Specify QS, P/PS for tropopause (for c2) +! And initialize variables +!------------------------------------------ + +!--------------------------------- +! Find tropopause level (ZTRPAUS) +!--------------------------------- +DO JL=KIDIA,KFDIA + ZTRPAUS(JL)=0.1_JPRB + ZPAPHD(JL)=1.0_JPRB/PAPH(JL,KLEV+1) +ENDDO +DO JK=1,KLEV-1 + DO JL=KIDIA,KFDIA + ZSIG=PAP(JL,JK)*ZPAPHD(JL) + IF (ZSIG>0.1_JPRB.AND.ZSIG<0.4_JPRB.AND.ZTP1(JL,JK)>ZTP1(JL,JK+1)) THEN + ZTRPAUS(JL)=ZSIG + ENDIF + ENDDO +ENDDO + +!----------------------------- +! Reset single level variables +!----------------------------- + +DO JL=KIDIA,KFDIA +ZANEWM1(JL) = 0.0_JPRB +ZDA(JL) = 0.0_JPRB +ZCOVPCLR(JL) = 0.0_JPRB +ZCOVPMAX(JL) = 0.0_JPRB +ZCOVPTOT(JL) = 0.0_JPRB +ZCLDTOPDIST(JL) = 0.0_JPRB +ENDDO + +!###################################################################### +! 3. *** PHYSICS *** +!###################################################################### + + +!---------------------------------------------------------------------- +! START OF VERTICAL LOOP +!---------------------------------------------------------------------- + +DO JK=NCLDTOP,KLEV + +!---------------------------------------------------------------------- +! 3.0 INITIALIZE VARIABLES +!---------------------------------------------------------------------- + + !--------------------------------- + ! First guess microphysics + !--------------------------------- + DO JM=1,NCLV + DO JL=KIDIA,KFDIA + ZQXFG(JL,JM)=ZQX(JL,JK,JM) + ENDDO + ENDDO + + !--------------------------------- + ! Set KLON arrays to zero + !--------------------------------- + + DO JL=KIDIA,KFDIA + ZLICLD(JL) = 0.0_JPRB + ZRAINAUT(JL) = 0.0_JPRB ! currently needed for diags + ZRAINACC(JL) = 0.0_JPRB ! currently needed for diags + ZSNOWAUT(JL) = 0.0_JPRB ! needed + ZLDEFR(JL) = 0.0_JPRB + ZACUST(JL) = 0.0_JPRB ! set later when needed + ZQPRETOT(JL) = 0.0_JPRB + ZLFINALSUM(JL)= 0.0_JPRB + + ! Required for first guess call + ZLCOND1(JL) = 0.0_JPRB + ZLCOND2(JL) = 0.0_JPRB + ZSUPSAT(JL) = 0.0_JPRB + ZLEVAPL(JL) = 0.0_JPRB + ZLEVAPI(JL) = 0.0_JPRB + + !------------------------------------- + ! solvers for cloud fraction + !------------------------------------- + ZSOLAB(JL) = 0.0_JPRB + ZSOLAC(JL) = 0.0_JPRB + + ZICETOT(JL) = 0.0_JPRB + ENDDO + + !------------------------------------------ + ! reset matrix so missing pathways are set + !------------------------------------------ + DO JM=1,NCLV + DO JN=1,NCLV + DO JL=KIDIA,KFDIA + ZSOLQB(JL,JN,JM) = 0.0_JPRB + ZSOLQA(JL,JN,JM) = 0.0_JPRB + ENDDO + ENDDO + ENDDO + + !---------------------------------- + ! reset new microphysics variables + !---------------------------------- + DO JM=1,NCLV + DO JL=KIDIA,KFDIA + ZFALLSRCE(JL,JM) = 0.0_JPRB + ZFALLSINK(JL,JM) = 0.0_JPRB + ZCONVSRCE(JL,JM) = 0.0_JPRB + ZCONVSINK(JL,JM) = 0.0_JPRB + ZPSUPSATSRCE(JL,JM) = 0.0_JPRB + ZRATIO(JL,JM) = 0.0_JPRB + ENDDO + ENDDO + + DO JL=KIDIA,KFDIA + + !------------------------- + ! derived variables needed + !------------------------- + + ZDP(JL) = PAPH(JL,JK+1)-PAPH(JL,JK) ! dp + ZGDP(JL) = RG/ZDP(JL) ! g/dp + ZRHO(JL) = PAP(JL,JK)/(RD*ZTP1(JL,JK)) ! p/RT air density + + ZDTGDP(JL) = PTSPHY*ZGDP(JL) ! dt g/dp + ZRDTGDP(JL) = ZDP(JL)*(1.0_JPRB/(PTSPHY*RG)) ! 1/(dt g/dp) + + IF (JK>1) ZDTGDPF(JL) = PTSPHY*RG/(PAP(JL,JK)-PAP(JL,JK-1)) + + !------------------------------------ + ! Calculate dqs/dT correction factor + !------------------------------------ + ! Reminder: RETV=RV/RD-1 + + ! liquid + ZFACW = R5LES/((ZTP1(JL,JK)-R4LES)**2) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEELIQT(JL,JK)) + ZDQSLIQDT(JL) = ZFACW*ZCOR*ZQSLIQ(JL,JK) + ZCORQSLIQ(JL) = 1.0_JPRB+RALVDCP*ZDQSLIQDT(JL) + + ! ice + ZFACI = R5IES/((ZTP1(JL,JK)-R4IES)**2) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEEW(JL,JK)) + ZDQSICEDT(JL) = ZFACI*ZCOR*ZQSICE(JL,JK) + ZCORQSICE(JL) = 1.0_JPRB+RALSDCP*ZDQSICEDT(JL) + + ! diagnostic mixed + ZALFAW = ZFOEALFA(JL,JK) + ZALFAWM(JL) = ZALFAW + ZFAC = ZALFAW*ZFACW+(1.0_JPRB-ZALFAW)*ZFACI + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV*ZFOEEWMT(JL,JK)) + ZDQSMIXDT(JL) = ZFAC*ZCOR*ZQSMIX(JL,JK) + ZCORQSMIX(JL) = 1.0_JPRB+FOELDCPM(ZTP1(JL,JK))*ZDQSMIXDT(JL) + + ! evaporation/sublimation limits + ZEVAPLIMMIX(JL) = MAX((ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSMIX(JL),0.0_JPRB) + ZEVAPLIMLIQ(JL) = MAX((ZQSLIQ(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSLIQ(JL),0.0_JPRB) + ZEVAPLIMICE(JL) = MAX((ZQSICE(JL,JK)-ZQX(JL,JK,NCLDQV))/ZCORQSICE(JL),0.0_JPRB) + + !-------------------------------- + ! in-cloud consensate amount + !-------------------------------- + ZTMPA = 1.0_JPRB/MAX(ZA(JL,JK),ZEPSEC) + ZLIQCLD(JL) = ZQX(JL,JK,NCLDQL)*ZTMPA + ZICECLD(JL) = ZQX(JL,JK,NCLDQI)*ZTMPA + ZLICLD(JL) = ZLIQCLD(JL)+ZICECLD(JL) + + ENDDO + + !------------------------------------------------ + ! Evaporate very small amounts of liquid and ice + !------------------------------------------------ + DO JL=KIDIA,KFDIA + + IF (ZQX(JL,JK,NCLDQL) < RLMIN) THEN + ZSOLQA(JL,NCLDQV,NCLDQL) = ZQX(JL,JK,NCLDQL) + ZSOLQA(JL,NCLDQL,NCLDQV) = -ZQX(JL,JK,NCLDQL) + ENDIF + + IF (ZQX(JL,JK,NCLDQI) < RLMIN) THEN + ZSOLQA(JL,NCLDQV,NCLDQI) = ZQX(JL,JK,NCLDQI) + ZSOLQA(JL,NCLDQI,NCLDQV) = -ZQX(JL,JK,NCLDQI) + ENDIF + + ENDDO + + !--------------------------------------------------------------------- + ! 3.1 ICE SUPERSATURATION ADJUSTMENT + !--------------------------------------------------------------------- + ! Note that the supersaturation adjustment is made with respect to + ! liquid saturation: when T>0C + ! ice saturation: when T<0C + ! with an adjustment made to allow for ice + ! supersaturation in the clear sky + ! Note also that the KOOP factor automatically clips the supersaturation + ! to a maximum set by the liquid water saturation mixing ratio + ! important for temperatures near to but below 0C + !----------------------------------------------------------------------- + +!DIR$ NOFUSION + DO JL=KIDIA,KFDIA + + !----------------------------------- + ! 3.1.1 Supersaturation limit (from Koop) + !----------------------------------- + ! Needs to be set for all temperatures + ZFOKOOP(JL)=FOKOOP(ZTP1(JL,JK)) + ENDDO + DO JL=KIDIA,KFDIA + + IF (ZTP1(JL,JK)>=RTT .OR. NSSOPT==0) THEN + ZFAC = 1.0_JPRB + ZFACI = 1.0_JPRB + ELSE + ZFAC = ZA(JL,JK)+ZFOKOOP(JL)*(1.0_JPRB-ZA(JL,JK)) + ZFACI = PTSPHY/RKOOPTAU + ENDIF + + !------------------------------------------------------------------- + ! 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + ! correction factor + ! [#Note: QSICE or QSLIQ] + !------------------------------------------------------------------- + + ! Calculate supersaturation to add to cloud + IF (ZA(JL,JK) > 1.0_JPRB-RAMIN) THEN + ZSUPSAT(JL) = MAX((ZQX(JL,JK,NCLDQV)-ZFAC*ZQSICE(JL,JK))/ZCORQSICE(JL)& + & ,0.0_JPRB) + ELSE + ! Calculate environmental humidity supersaturation + ZQP1ENV = (ZQX(JL,JK,NCLDQV) - ZA(JL,JK)*ZQSICE(JL,JK))/ & + & MAX(1.0_JPRB-ZA(JL,JK),ZEPSILON) + !& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + ZSUPSAT(JL) = MAX((1.0_JPRB-ZA(JL,JK))*(ZQP1ENV-ZFAC*ZQSICE(JL,JK))& + & /ZCORQSICE(JL),0.0_JPRB) + ENDIF + + !------------------------------------------------------------------- + ! Here the supersaturation is turned into liquid water + ! However, if the temperature is below the threshold for homogeneous + ! freezing then the supersaturation is turned instantly to ice. + !-------------------------------------------------------------------- + + IF (ZSUPSAT(JL) > ZEPSEC) THEN + + IF (ZTP1(JL,JK) > RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)+ZSUPSAT(JL) + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)-ZSUPSAT(JL) + ! Include liquid in first guess + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZSUPSAT(JL) + ELSE + ! Turn supersaturation into ice water + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)+ZSUPSAT(JL) + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)-ZSUPSAT(JL) + ! Add ice to first guess for deposition term + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZSUPSAT(JL) + ENDIF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC(JL) = (1.0_JPRB-ZA(JL,JK))*ZFACI + + ENDIF + + !------------------------------------------------------- + ! 3.1.3 Include supersaturation from previous timestep + ! (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + !------------------------------------------------------- + IF (PSUPSAT(JL,JK)>ZEPSEC) THEN + IF (ZTP1(JL,JK) > RTHOMO) THEN + ! Turn supersaturation into liquid water + ZSOLQA(JL,NCLDQL,NCLDQL) = ZSOLQA(JL,NCLDQL,NCLDQL)+PSUPSAT(JL,JK) + ZPSUPSATSRCE(JL,NCLDQL) = PSUPSAT(JL,JK) + ! Add liquid to first guess for deposition term + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+PSUPSAT(JL,JK) + ! Store cloud budget diagnostics if required + ELSE + ! Turn supersaturation into ice water + ZSOLQA(JL,NCLDQI,NCLDQI) = ZSOLQA(JL,NCLDQI,NCLDQI)+PSUPSAT(JL,JK) + ZPSUPSATSRCE(JL,NCLDQI) = PSUPSAT(JL,JK) + ! Add ice to first guess for deposition term + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+PSUPSAT(JL,JK) + ! Store cloud budget diagnostics if required + ENDIF + + ! Increase cloud amount using RKOOPTAU timescale + ZSOLAC(JL)=(1.0_JPRB-ZA(JL,JK))*ZFACI + ! Store cloud budget diagnostics if required + ENDIF + + ENDDO ! on JL + + !--------------------------------------------------------------------- + ! 3.2 DETRAINMENT FROM CONVECTION + !--------------------------------------------------------------------- + ! * Diagnostic T-ice/liq split retained for convection + ! Note: This link is now flexible and a future convection + ! scheme can detrain explicit seperate budgets of: + ! cloud water, ice, rain and snow + ! * There is no (1-ZA) multiplier term on the cloud detrainment + ! term, since is now written in mass-flux terms + ! [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + !--------------------------------------------------------------------- + IF (JK < KLEV .AND. JK>=NCLDTOP) THEN + + DO JL=KIDIA,KFDIA + + PLUDE(JL,JK)=PLUDE(JL,JK)*ZDTGDP(JL) + + IF(LDCUM(JL).AND.PLUDE(JL,JK) > RLMIN.AND.PLU(JL,JK+1)> ZEPSEC) THEN + + ZSOLAC(JL)=ZSOLAC(JL)+PLUDE(JL,JK)/PLU(JL,JK+1) + ! *diagnostic temperature split* + ZALFAW = ZFOEALFA(JL,JK) + ZCONVSRCE(JL,NCLDQL) = ZALFAW*PLUDE(JL,JK) + ZCONVSRCE(JL,NCLDQI) = (1.0_JPRB-ZALFAW)*PLUDE(JL,JK) + ZSOLQA(JL,NCLDQL,NCLDQL) = ZSOLQA(JL,NCLDQL,NCLDQL)+ZCONVSRCE(JL,NCLDQL) + ZSOLQA(JL,NCLDQI,NCLDQI) = ZSOLQA(JL,NCLDQI,NCLDQI)+ZCONVSRCE(JL,NCLDQI) + + ELSE + + PLUDE(JL,JK)=0.0_JPRB + + ENDIF + ! *convective snow detrainment source + IF (LDCUM(JL)) ZSOLQA(JL,NCLDQS,NCLDQS) = ZSOLQA(JL,NCLDQS,NCLDQS) + PSNDE(JL,JK)*ZDTGDP(JL) + + ENDDO + + ENDIF ! JK NCLDTOP) THEN + + DO JL=KIDIA,KFDIA + ZMF(JL)=MAX(0.0_JPRB,(PMFU(JL,JK)+PMFD(JL,JK))*ZDTGDP(JL)) + ZACUST(JL)=ZMF(JL)*ZANEWM1(JL) + ENDDO + + DO JM=1,NCLV + IF (.NOT.LLFALL(JM).AND.IPHASE(JM)>0) THEN + DO JL=KIDIA,KFDIA + ZLCUST(JL,JM)=ZMF(JL)*ZQXNM1(JL,JM) + ! record total flux for enthalpy budget: + ZCONVSRCE(JL,JM)=ZCONVSRCE(JL,JM)+ZLCUST(JL,JM) + ENDDO + ENDIF + ENDDO + + ! Now have to work out how much liquid evaporates at arrival point + ! since there is no prognostic memory for in-cloud humidity, i.e. + ! we always assume cloud is saturated. + + DO JL=KIDIA,KFDIA + ZDTDP=ZRDCP*0.5_JPRB*(ZTP1(JL,JK-1)+ZTP1(JL,JK))/PAPH(JL,JK) + ZDTFORC = ZDTDP*(PAP(JL,JK)-PAP(JL,JK-1)) + ![#Note: Diagnostic mixed phase should be replaced below] + ZDQS(JL)=ZANEWM1(JL)*ZDTFORC*ZDQSMIXDT(JL) + ENDDO + + DO JM=1,NCLV + IF (.NOT.LLFALL(JM).AND.IPHASE(JM)>0) THEN + DO JL=KIDIA,KFDIA + ZLFINAL=MAX(0.0_JPRB,ZLCUST(JL,JM)-ZDQS(JL)) !lim to zero + ! no supersaturation allowed incloud ---V + ZEVAP=MIN((ZLCUST(JL,JM)-ZLFINAL),ZEVAPLIMMIX(JL)) +! ZEVAP=0.0_JPRB + ZLFINAL=ZLCUST(JL,JM)-ZEVAP + ZLFINALSUM(JL)=ZLFINALSUM(JL)+ZLFINAL ! sum + + ZSOLQA(JL,JM,JM) = ZSOLQA(JL,JM,JM)+ZLCUST(JL,JM) ! whole sum + ZSOLQA(JL,NCLDQV,JM) = ZSOLQA(JL,NCLDQV,JM)+ZEVAP + ZSOLQA(JL,JM,NCLDQV) = ZSOLQA(JL,JM,NCLDQV)-ZEVAP + ENDDO + ENDIF + ENDDO + + ! Reset the cloud contribution if no cloud water survives to this level: + DO JL=KIDIA,KFDIA + IF (ZLFINALSUM(JL)NCLDTOP + + !--------------------------------------------------------------------- + ! Subsidence sink of cloud to the layer below + ! (Implicit - re. CFL limit on convective mass flux) + !--------------------------------------------------------------------- + + DO JL=KIDIA,KFDIA + + IF(JK 0 .AND. PLUDE(JL,JK) > ZEPSEC)& + & ZLDIFDT(JL)=RCLDIFF_CONVI*ZLDIFDT(JL) + ENDDO + + ! At the moment, works on mixed RH profile and partitioned ice/liq fraction + ! so that it is similar to previous scheme + ! Should apply RHw for liquid cloud and RHi for ice cloud separately + DO JL=KIDIA,KFDIA + IF(ZLI(JL,JK) > ZEPSEC) THEN + ! Calculate environmental humidity +! ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& +! & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) +! ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB) + ZLEROS=ZA(JL,JK)*ZE + ZLEROS=MIN(ZLEROS,ZEVAPLIMMIX(JL)) + ZLEROS=MIN(ZLEROS,ZLI(JL,JK)) + ZAEROS=ZLEROS/ZLICLD(JL) !if linear term + + ! Erosion is -ve LINEAR in L,A + ZSOLAC(JL)=ZSOLAC(JL)-ZAEROS !linear + + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)+ZLIQFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)-ZLIQFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)+ZICEFRAC(JL,JK)*ZLEROS + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)-ZICEFRAC(JL,JK)*ZLEROS + + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + !---------------------------------------------------------------------- + ! calculate dqs/dt + ! Note: For the separate prognostic Qi and Ql, one would ideally use + ! Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + ! forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + ! These would then instantaneous freeze if T<-38C or lead to ice growth + ! by deposition in warmer mixed phase clouds. However, since we do + ! not have a separate prognostic equation for in-cloud humidity or a + ! statistical scheme approach in place, the depositional growth of ice + ! in the mixed phase can not be modelled and we resort to supersaturation + ! wrt ice instanteously converting to ice over one timestep + ! (see Tompkins et al. QJRMS 2007 for details) + ! Thus for the initial implementation the diagnostic mixed phase is + ! retained for the moment, and the level of approximation noted. + !---------------------------------------------------------------------- + + DO JL=KIDIA,KFDIA + ZDTDP = ZRDCP*ZTP1(JL,JK)/PAP(JL,JK) + ZDPMXDT = ZDP(JL)*ZQTMST + ZMFDN = 0.0_JPRB + IF(JK < KLEV) ZMFDN=PMFU(JL,JK+1)+PMFD(JL,JK+1) + ZWTOT = PVERVEL(JL,JK)+0.5_JPRB*RG*(PMFU(JL,JK)+PMFD(JL,JK)+ZMFDN) + ZWTOT = MIN(ZDPMXDT,MAX(-ZDPMXDT,ZWTOT)) + ZZZDT = PHRSW(JL,JK)+PHRLW(JL,JK) + ZDTDIAB = MIN(ZDPMXDT*ZDTDP,MAX(-ZDPMXDT*ZDTDP,ZZZDT))& + & *PTSPHY+RALFDCP*ZLDEFR(JL) +! Note: ZLDEFR should be set to the difference between the mixed phase functions +! in the convection and cloud scheme, but this is not calculated, so is zero and +! the functions must be the same + ZDTFORC = ZDTDP*ZWTOT*PTSPHY+ZDTDIAB + ZQOLD(JL) = ZQSMIX(JL,JK) + ZTOLD(JL) = ZTP1(JL,JK) + ZTP1(JL,JK) = ZTP1(JL,JK)+ZDTFORC + ZTP1(JL,JK) = MAX(ZTP1(JL,JK),160.0_JPRB) + LLFLAG(JL) = .TRUE. + ENDDO + + ! Formerly a call to CUADJTQ(..., ICALL=5) + DO JL=KIDIA,KFDIA + ZQP = 1.0_JPRB/PAP(JL,JK) + ZQSAT = FOEEWM(ZTP1(JL,JK))*ZQP + ZQSAT = MIN(0.5_JPRB,ZQSAT) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV *ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND = (ZQSMIX(JL,JK)-ZQSAT)/(1.0_JPRB+ZQSAT*ZCOR*FOEDEM(ZTP1(JL,JK))) + ZTP1(JL,JK) = ZTP1(JL,JK)+FOELDCPM(ZTP1(JL,JK))*ZCOND + ZQSMIX(JL,JK) = ZQSMIX(JL,JK)-ZCOND + ZQSAT = FOEEWM(ZTP1(JL,JK))*ZQP + ZQSAT = MIN(0.5_JPRB,ZQSAT) + ZCOR = 1.0_JPRB/(1.0_JPRB-RETV *ZQSAT) + ZQSAT = ZQSAT*ZCOR + ZCOND1= (ZQSMIX(JL,JK)-ZQSAT)/(1.0_JPRB+ZQSAT*ZCOR*FOEDEM(ZTP1(JL,JK))) + ZTP1(JL,JK) = ZTP1(JL,JK)+FOELDCPM(ZTP1(JL,JK))*ZCOND1 + ZQSMIX(JL,JK) = ZQSMIX(JL,JK)-ZCOND1 + ENDDO + + DO JL=KIDIA,KFDIA + ZDQS(JL) = ZQSMIX(JL,JK)-ZQOLD(JL) + ZQSMIX(JL,JK) = ZQOLD(JL) + ZTP1(JL,JK) = ZTOLD(JL) + ENDDO + + !---------------------------------------------------------------------- + ! 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + ! ---------------------------------------------------------------------- + ! Erosion term is LINEAR in L + ! Changed to be uniform distribution in cloud region + + DO JL=KIDIA,KFDIA + + ! Previous function based on DELTA DISTRIBUTION in cloud: + IF (ZDQS(JL) > 0.0_JPRB) THEN +! If subsidence evaporation term is turned off, then need to use updated +! liquid and cloud here? +! ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + ZLEVAP = ZA(JL,JK)*MIN(ZDQS(JL),ZLICLD(JL)) + ZLEVAP = MIN(ZLEVAP,ZEVAPLIMMIX(JL)) + ZLEVAP = MIN(ZLEVAP,MAX(ZQSMIX(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB)) + + ! For first guess call + ZLEVAPL(JL) = ZLIQFRAC(JL,JK)*ZLEVAP + ZLEVAPI(JL) = ZICEFRAC(JL,JK)*ZLEVAP + + ZSOLQA(JL,NCLDQV,NCLDQL) = ZSOLQA(JL,NCLDQV,NCLDQL)+ZLIQFRAC(JL,JK)*ZLEVAP + ZSOLQA(JL,NCLDQL,NCLDQV) = ZSOLQA(JL,NCLDQL,NCLDQV)-ZLIQFRAC(JL,JK)*ZLEVAP + + ZSOLQA(JL,NCLDQV,NCLDQI) = ZSOLQA(JL,NCLDQV,NCLDQI)+ZICEFRAC(JL,JK)*ZLEVAP + ZSOLQA(JL,NCLDQI,NCLDQV) = ZSOLQA(JL,NCLDQI,NCLDQV)-ZICEFRAC(JL,JK)*ZLEVAP + + ENDIF + + ENDDO + + !---------------------------------------------------------------------- + ! 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + !---------------------------------------------------------------------- + ! (1) Increase of cloud water in existing clouds + DO JL=KIDIA,KFDIA + IF(ZA(JL,JK) > ZEPSEC.AND.ZDQS(JL) <= -RLMIN) THEN + + ZLCOND1(JL)=MAX(-ZDQS(JL),0.0_JPRB) !new limiter + +!old limiter (significantly improves upper tropospheric humidity rms) + IF(ZA(JL,JK) > 0.99_JPRB) THEN + ZCOR=1.0_JPRB/(1.0_JPRB-RETV*ZQSMIX(JL,JK)) + ZCDMAX=(ZQX(JL,JK,NCLDQV)-ZQSMIX(JL,JK))/& + & (1.0_JPRB+ZCOR*ZQSMIX(JL,JK)*FOEDEM(ZTP1(JL,JK))) + ELSE + ZCDMAX=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/ZA(JL,JK) + ENDIF + ZLCOND1(JL)=MAX(MIN(ZLCOND1(JL),ZCDMAX),0.0_JPRB) +! end old limiter + + ZLCOND1(JL)=ZA(JL,JK)*ZLCOND1(JL) + IF(ZLCOND1(JL) < RLMIN) ZLCOND1(JL)=0.0_JPRB + + !------------------------------------------------------------------------- + ! All increase goes into liquid unless so cold cloud homogeneously freezes + ! Include new liquid formation in first guess value, otherwise liquid + ! remains at cold temperatures until next timestep. + !------------------------------------------------------------------------- + IF (ZTP1(JL,JK)>RTHOMO) THEN + ZSOLQA(JL,NCLDQL,NCLDQV)=ZSOLQA(JL,NCLDQL,NCLDQV)+ZLCOND1(JL) + ZSOLQA(JL,NCLDQV,NCLDQL)=ZSOLQA(JL,NCLDQV,NCLDQL)-ZLCOND1(JL) + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZLCOND1(JL) + ELSE + ZSOLQA(JL,NCLDQI,NCLDQV)=ZSOLQA(JL,NCLDQI,NCLDQV)+ZLCOND1(JL) + ZSOLQA(JL,NCLDQV,NCLDQI)=ZSOLQA(JL,NCLDQV,NCLDQI)-ZLCOND1(JL) + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZLCOND1(JL) + ENDIF + ENDIF + ENDDO + + ! (2) Generation of new clouds (da/dt>0) + + DO JL=KIDIA,KFDIA + + IF(ZDQS(JL) <= -RLMIN .AND. ZA(JL,JK)<1.0_JPRB-ZEPSEC) THEN + + !--------------------------- + ! Critical relative humidity + !--------------------------- + ZRHC=RAMID + ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + ! Increase RHcrit to 1.0 towards the surface (eta>0.8) + IF(ZSIGK > 0.8_JPRB) THEN + ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + ENDIF + +! Commented out for CY37R1 to reduce humidity in high trop and strat +! ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above +! ZBOTT=ZTRPAUS(JL)+0.2_JPRB +! IF(ZSIGK < ZBOTT) THEN +! ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) +! ENDIF + + !--------------------------- + ! Supersaturation options + !--------------------------- + IF (NSSOPT==0) THEN + ! No scheme + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSICE(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZQE=MAX(0.0_JPRB,ZQE) + ELSEIF (NSSOPT==1) THEN + ! Tompkins + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSICE(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZQE=MAX(0.0_JPRB,ZQE) + ELSEIF (NSSOPT==2) THEN + ! Lohmann and Karcher + ZQE=ZQX(JL,JK,NCLDQV) + ELSEIF (NSSOPT==3) THEN + ! Gierens + ZQE=ZQX(JL,JK,NCLDQV)+ZLI(JL,JK) + ENDIF + + IF (ZTP1(JL,JK)>=RTT .OR. NSSOPT==0) THEN + ! No ice supersaturation allowed + ZFAC=1.0_JPRB + ELSE + ! Ice supersaturation + ZFAC=ZFOKOOP(JL) + ENDIF + + IF(ZQE >= ZRHC*ZQSICE(JL,JK)*ZFAC.AND.ZQERTHOMO) THEN + ZSOLQA(JL,NCLDQL,NCLDQV)=ZSOLQA(JL,NCLDQL,NCLDQV)+ZLCOND2(JL) + ZSOLQA(JL,NCLDQV,NCLDQL)=ZSOLQA(JL,NCLDQV,NCLDQL)-ZLCOND2(JL) + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)+ZLCOND2(JL) + ELSE ! homogeneous freezing + ZSOLQA(JL,NCLDQI,NCLDQV)=ZSOLQA(JL,NCLDQI,NCLDQV)+ZLCOND2(JL) + ZSOLQA(JL,NCLDQV,NCLDQI)=ZSOLQA(JL,NCLDQV,NCLDQI)-ZLCOND2(JL) + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZLCOND2(JL) + ENDIF + + ENDIF + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 3.7 Growth of ice by vapour deposition + !---------------------------------------------------------------------- + ! Following Rotstayn et al. 2001: + ! does not use the ice nuclei number from cloudaer.F90 + ! but rather a simple Meyers et al. 1992 form based on the + ! supersaturation and assuming clouds are saturated with + ! respect to liquid water (well mixed), (or Koop adjustment) + ! Growth considered as sink of liquid water if present so + ! Bergeron-Findeisen adjustment in autoconversion term no longer needed + !---------------------------------------------------------------------- + + !-------------------------------------------------------- + !- + !- Ice deposition following Rotstayn et al. (2001) + !- (monodisperse ice particle size distribution) + !- + !-------------------------------------------------------- + IF (IDEPICE == 1) THEN + + DO JL=KIDIA,KFDIA + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JL,JK-1) < RCLDTOPCF .AND. ZA(JL,JK) >= RCLDTOPCF) THEN + ZCLDTOPDIST(JL) = 0.0_JPRB + ELSE + ZCLDTOPDIST(JL) = ZCLDTOPDIST(JL) + ZDP(JL)/(ZRHO(JL)*RG) + ENDIF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JL,JK)RLMIN) THEN ! T<273K + + ZVPICE=FOEEICE(ZTP1(JL,JK))*RV/RD + ZVPLIQ=ZVPICE*ZFOKOOP(JL) + ZICENUCLEI(JL)=1000.0_JPRB*EXP(12.96_JPRB*(ZVPLIQ-ZVPICE)/ZVPLIQ-0.639_JPRB) + + !------------------------------------------------ + ! 2.4e-2 is conductivity of air + ! 8.8 = 700**1/3 = density of ice to the third + !------------------------------------------------ + ZADD=RLSTT*(RLSTT/(RV*ZTP1(JL,JK))-1.0_JPRB)/(2.4E-2_JPRB*ZTP1(JL,JK)) + ZBDD=RV*ZTP1(JL,JK)*PAP(JL,JK)/(2.21_JPRB*ZVPICE) + ZCVDS=7.8_JPRB*(ZICENUCLEI(JL)/ZRHO(JL))**0.666_JPRB*(ZVPLIQ-ZVPICE) / & + & (8.87_JPRB*(ZADD+ZBDD)*ZVPICE) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0=MAX(ZICECLD(JL), ZICENUCLEI(JL)*RICEINIT/ZRHO(JL)) + + !------------------ + ! new value of ice: + !------------------ + ZINEW=(0.666_JPRB*ZCVDS*PTSPHY+ZICE0**0.666_JPRB)**1.5_JPRB + + !--------------------------- + ! grid-mean deposition rate: + !--------------------------- + ZDEPOS=MAX(ZA(JL,JK)*(ZINEW-ZICE0),0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS=MIN(ZDEPOS,ZQXFG(JL,NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- +! ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI(JL)/15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB-ZINFACTOR)* & + & (RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH),1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(JL,NCLDQI,NCLDQL)=ZSOLQA(JL,NCLDQI,NCLDQL)+ZDEPOS + ZSOLQA(JL,NCLDQL,NCLDQI)=ZSOLQA(JL,NCLDQL,NCLDQI)-ZDEPOS + ZQXFG(JL,NCLDQI)=ZQXFG(JL,NCLDQI)+ZDEPOS + ZQXFG(JL,NCLDQL)=ZQXFG(JL,NCLDQL)-ZDEPOS + + ENDIF + ENDDO + + !-------------------------------------------------------- + !- + !- Ice deposition assuming ice PSD + !- + !-------------------------------------------------------- + ELSEIF (IDEPICE == 2) THEN + + DO JL=KIDIA,KFDIA + + !-------------------------------------------------------------- + ! Calculate distance from cloud top + ! defined by cloudy layer below a layer with cloud frac <0.01 + ! ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + !-------------------------------------------------------------- + + IF (ZA(JL,JK-1) < RCLDTOPCF .AND. ZA(JL,JK) >= RCLDTOPCF) THEN + ZCLDTOPDIST(JL) = 0.0_JPRB + ELSE + ZCLDTOPDIST(JL) = ZCLDTOPDIST(JL) + ZDP(JL)/(ZRHO(JL)*RG) + ENDIF + + !-------------------------------------------------------------- + ! only treat depositional growth if liquid present. due to fact + ! that can not model ice growth from vapour without additional + ! in-cloud water vapour variable + !-------------------------------------------------------------- + IF (ZTP1(JL,JK)RLMIN) THEN ! T<273K + + ZVPICE = FOEEICE(ZTP1(JL,JK))*RV/RD + ZVPLIQ = ZVPICE*ZFOKOOP(JL) + ZICENUCLEI(JL)=1000.0_JPRB*EXP(12.96_JPRB*(ZVPLIQ-ZVPICE)/ZVPLIQ-0.639_JPRB) + + !----------------------------------------------------- + ! RICEINIT=1.E-12_JPRB is initial mass of ice particle + !----------------------------------------------------- + ZICE0=MAX(ZICECLD(JL), ZICENUCLEI(JL)*RICEINIT/ZRHO(JL)) + + ! Particle size distribution + ZTCG = 1.0_JPRB + ZFACX1I = 1.0_JPRB + + ZAPLUSB = RCL_APB1*ZVPICE-RCL_APB2*ZVPICE*ZTP1(JL,JK)+ & + & PAP(JL,JK)*RCL_APB3*ZTP1(JL,JK)**3._JPRB + ZCORRFAC = (1.0_JPRB/ZRHO(JL))**0.5_JPRB + ZCORRFAC2 = ((ZTP1(JL,JK)/273.0_JPRB)**1.5_JPRB) & + & *(393.0_JPRB/(ZTP1(JL,JK)+120.0_JPRB)) + + ZPR02 = ZRHO(JL)*ZICE0*RCL_CONST1I/(ZTCG*ZFACX1I) + + ZTERM1 = (ZVPLIQ-ZVPICE)*ZTP1(JL,JK)**2.0_JPRB*ZVPICE*ZCORRFAC2*ZTCG* & + & RCL_CONST2I*ZFACX1I/(ZRHO(JL)*ZAPLUSB*ZVPICE) + ZTERM2 = 0.65_JPRB*RCL_CONST6I*ZPR02**RCL_CONST4I+RCL_CONST3I & + & *ZCORRFAC**0.5_JPRB*ZRHO(JL)**0.5_JPRB & + & *ZPR02**RCL_CONST5I/ZCORRFAC2**0.5_JPRB + + ZDEPOS = MAX(ZA(JL,JK)*ZTERM1*ZTERM2*PTSPHY,0.0_JPRB) + + !-------------------------------------------------------------------- + ! Limit deposition to liquid water amount + ! If liquid is all frozen, ice would use up reservoir of water + ! vapour in excess of ice saturation mixing ratio - However this + ! can not be represented without a in-cloud humidity variable. Using + ! the grid-mean humidity would imply a large artificial horizontal + ! flux from the clear sky to the cloudy area. We thus rely on the + ! supersaturation check to clean up any remaining supersaturation + !-------------------------------------------------------------------- + ZDEPOS=MIN(ZDEPOS,ZQXFG(JL,NCLDQL)) ! limit to liquid water amount + + !-------------------------------------------------------------------- + ! At top of cloud, reduce deposition rate near cloud top to account for + ! small scale turbulent processes, limited ice nucleation and ice fallout + !-------------------------------------------------------------------- + ! Change to include dependence on ice nuclei concentration + ! to increase deposition rate with decreasing temperatures + ZINFACTOR = MIN(ZICENUCLEI(JL)/15000._JPRB, 1.0_JPRB) + ZDEPOS = ZDEPOS*MIN(ZINFACTOR + (1.0_JPRB-ZINFACTOR)* & + & (RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH),1.0_JPRB) + + !-------------- + ! add to matrix + !-------------- + ZSOLQA(JL,NCLDQI,NCLDQL) = ZSOLQA(JL,NCLDQI,NCLDQL)+ZDEPOS + ZSOLQA(JL,NCLDQL,NCLDQI) = ZSOLQA(JL,NCLDQL,NCLDQI)-ZDEPOS + ZQXFG(JL,NCLDQI) = ZQXFG(JL,NCLDQI)+ZDEPOS + ZQXFG(JL,NCLDQL) = ZQXFG(JL,NCLDQL)-ZDEPOS + ENDIF + ENDDO + + ENDIF ! on IDEPICE + + !###################################################################### + ! 4 *** PRECIPITATION PROCESSES *** + !###################################################################### + + !---------------------------------- + ! revise in-cloud consensate amount + !---------------------------------- + DO JL=KIDIA,KFDIA + ZTMPA = 1.0_JPRB/MAX(ZA(JL,JK),ZEPSEC) + ZLIQCLD(JL) = ZQXFG(JL,NCLDQL)*ZTMPA + ZICECLD(JL) = ZQXFG(JL,NCLDQI)*ZTMPA + ZLICLD(JL) = ZLIQCLD(JL)+ZICECLD(JL) + ENDDO + + !---------------------------------------------------------------------- + ! 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + ! now that rain, snow, graupel species are prognostic + ! the precipitation flux can be defined directly level by level + ! There is no vertical memory required from the flux variable + !---------------------------------------------------------------------- + + DO JM = 1,NCLV + IF (LLFALL(JM) .OR. JM == NCLDQI) THEN + DO JL=KIDIA,KFDIA + !------------------------ + ! source from layer above + !------------------------ + IF (JK > NCLDTOP) THEN + ZFALLSRCE(JL,JM) = ZPFPLSX(JL,JK,JM)*ZDTGDP(JL) + ZSOLQA(JL,JM,JM) = ZSOLQA(JL,JM,JM)+ZFALLSRCE(JL,JM) + ZQXFG(JL,JM) = ZQXFG(JL,JM)+ZFALLSRCE(JL,JM) + ! use first guess precip----------V + ZQPRETOT(JL) = ZQPRETOT(JL)+ZQXFG(JL,JM) + ENDIF + !------------------------------------------------- + ! sink to next layer, constant fall speed + !------------------------------------------------- + ! if aerosol effect then override + ! note that for T>233K this is the same as above. + IF (LAERICESED .AND. JM == NCLDQI) THEN + ZRE_ICE=PRE_ICE(JL,JK) + ! The exponent value is from + ! Morrison et al. JAS 2005 Appendix + ZVQX(NCLDQI) = 0.002_JPRB*ZRE_ICE**1.0_JPRB + ENDIF + ZFALL=ZVQX(JM)*ZRHO(JL) + !------------------------------------------------- + ! modified by Heymsfield and Iaquinta JAS 2000 + !------------------------------------------------- + ! ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + ! &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + ZFALLSINK(JL,JM)=ZDTGDP(JL)*ZFALL + ! Cloud budget diagnostic stored at end as implicit + ENDDO ! jl + ENDIF ! LLFALL + ENDDO ! jm + + !--------------------------------------------------------------- + ! Precip cover overlap using MAX-RAN Overlap + ! Since precipitation is now prognostic we must + ! 1) apply an arbitrary minimum coverage (0.3) if precip>0 + ! 2) abandon the 2-flux clr/cld treatment + ! 3) Thus, since we have no memory of the clear sky precip + ! fraction, we mimic the previous method by reducing + ! ZCOVPTOT(JL), which has the memory, proportionally with + ! the precip evaporation rate, taking cloud fraction + ! into account + ! #3 above leads to much smoother vertical profiles of + ! precipitation fraction than the Klein-Jakob scheme which + ! monotonically increases precip fraction and then resets + ! it to zero in a step function once clear-sky precip reaches + ! zero. + !--------------------------------------------------------------- + DO JL=KIDIA,KFDIA + IF (ZQPRETOT(JL)>ZEPSEC) THEN + ZCOVPTOT(JL) = 1.0_JPRB - ((1.0_JPRB-ZCOVPTOT(JL))*& + & (1.0_JPRB - MAX(ZA(JL,JK),ZA(JL,JK-1)))/& + & (1.0_JPRB - MIN(ZA(JL,JK-1),1.0_JPRB-1.E-06_JPRB)) ) + ZCOVPTOT(JL) = MAX(ZCOVPTOT(JL),RCOVPMIN) + ZCOVPCLR(JL) = MAX(0.0_JPRB,ZCOVPTOT(JL)-ZA(JL,JK)) ! clear sky proportion + ZRAINCLD(JL) = ZQXFG(JL,NCLDQR)/ZCOVPTOT(JL) + ZSNOWCLD(JL) = ZQXFG(JL,NCLDQS)/ZCOVPTOT(JL) + ZCOVPMAX(JL) = MAX(ZCOVPTOT(JL),ZCOVPMAX(JL)) + ELSE + ZRAINCLD(JL) = 0.0_JPRB + ZSNOWCLD(JL) = 0.0_JPRB + ZCOVPTOT(JL) = 0.0_JPRB ! no flux - reset cover + ZCOVPCLR(JL) = 0.0_JPRB ! reset clear sky proportion + ZCOVPMAX(JL) = 0.0_JPRB ! reset max cover for ZZRH calc + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.3a AUTOCONVERSION TO SNOW + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + IF(ZTP1(JL,JK) <= RTT) THEN + !----------------------------------------------------- + ! Snow Autoconversion rate follow Lin et al. 1983 + !----------------------------------------------------- + IF (ZICECLD(JL)>ZEPSEC) THEN + + ZZCO=PTSPHY*RSNOWLIN1*EXP(RSNOWLIN2*(ZTP1(JL,JK)-RTT)) + + IF (LAERICEAUTO) THEN + ZLCRIT=PICRIT_AER(JL,JK) + ! 0.3 = N**0.333 with N=0.027 + ZZCO=ZZCO*(RNICE/PNICE(JL,JK))**0.333_JPRB + ELSE + ZLCRIT=RLCRITSNOW + ENDIF + + ZSNOWAUT(JL)=ZZCO*(1.0_JPRB-EXP(-(ZICECLD(JL)/ZLCRIT)**2)) + ZSOLQB(JL,NCLDQS,NCLDQI)=ZSOLQB(JL,NCLDQS,NCLDQI)+ZSNOWAUT(JL) + + ENDIF + ENDIF + + !---------------------------------------------------------------------- + ! 4.3b AUTOCONVERSION WARM CLOUDS + ! Collection and accretion will require separate treatment + ! but for now we keep this simple treatment + !---------------------------------------------------------------------- + + IF (ZLIQCLD(JL)>ZEPSEC) THEN + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Sundqvist (1989) + !- + !-------------------------------------------------------- + IF (IWARMRAIN == 1) THEN + + ZZCO=RKCONV*PTSPHY + + IF (LAERLIQAUTOLSP) THEN + ZLCRIT=PLCRIT_AER(JL,JK) + ! 0.3 = N**0.333 with N=125 cm-3 + ZZCO=ZZCO*(RCCN/PCCN(JL,JK))**0.333_JPRB + ELSE + ! Modify autoconversion threshold dependent on: + ! land (polluted, high CCN, smaller droplets, higher threshold) + ! sea (clean, low CCN, larger droplets, lower threshold) + IF (PLSM(JL) > 0.5_JPRB) THEN + ZLCRIT = RCLCRIT_LAND ! land + ELSE + ZLCRIT = RCLCRIT_SEA ! ocean + ENDIF + ENDIF + + !------------------------------------------------------------------ + ! Parameters for cloud collection by rain and snow. + ! Note that with new prognostic variable it is now possible + ! to REPLACE this with an explicit collection parametrization + !------------------------------------------------------------------ + ZPRECIP=(ZPFPLSX(JL,JK,NCLDQS)+ZPFPLSX(JL,JK,NCLDQR))/MAX(ZEPSEC,ZCOVPTOT(JL)) + ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB)) +! ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& +! &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + IF (LAERLIQCOLL) THEN + ! 5.0 = N**0.333 with N=125 cm-3 + ZCFPR=ZCFPR*(RCCN/PCCN(JL,JK))**0.333_JPRB + ENDIF + + ZZCO=ZZCO*ZCFPR + ZLCRIT=ZLCRIT/MAX(ZCFPR,ZEPSEC) + + IF(ZLIQCLD(JL)/ZLCRIT < 20.0_JPRB )THEN ! Security for exp for some compilers + ZRAINAUT(JL)=ZZCO*(1.0_JPRB-EXP(-(ZLIQCLD(JL)/ZLCRIT)**2)) + ELSE + ZRAINAUT(JL)=ZZCO + ENDIF + + ! rain freezes instantly + IF(ZTP1(JL,JK) <= RTT) THEN + ZSOLQB(JL,NCLDQS,NCLDQL)=ZSOLQB(JL,NCLDQS,NCLDQL)+ZRAINAUT(JL) + ELSE + ZSOLQB(JL,NCLDQR,NCLDQL)=ZSOLQB(JL,NCLDQR,NCLDQL)+ZRAINAUT(JL) + ENDIF + + !-------------------------------------------------------- + !- + !- Warm-rain process follow Khairoutdinov and Kogan (2000) + !- + !-------------------------------------------------------- + ELSEIF (IWARMRAIN == 2) THEN + + IF (PLSM(JL) > 0.5_JPRB) THEN ! land + ZCONST = RCL_KK_CLOUD_NUM_LAND + ZLCRIT = RCLCRIT_LAND + ELSE ! ocean + ZCONST = RCL_KK_CLOUD_NUM_SEA + ZLCRIT = RCLCRIT_SEA + ENDIF + + IF (ZLIQCLD(JL) > ZLCRIT) THEN + + ZRAINAUT(JL) = 1.5_JPRB*ZA(JL,JK)*PTSPHY* & + & RCL_KKAau * ZLIQCLD(JL)**RCL_KKBauq * ZCONST**RCL_KKBaun + + ZRAINAUT(JL) = MIN(ZRAINAUT(JL),ZQXFG(JL,NCLDQL)) + IF (ZRAINAUT(JL) < ZEPSEC) ZRAINAUT(JL) = 0.0_JPRB + + ZRAINACC(JL) = 2.0_JPRB*ZA(JL,JK)*PTSPHY* & + & RCL_KKAac * (ZLIQCLD(JL)*ZRAINCLD(JL))**RCL_KKBac + + ZRAINACC(JL) = MIN(ZRAINACC(JL),ZQXFG(JL,NCLDQL)) + IF (ZRAINACC(JL) < ZEPSEC) ZRAINACC(JL) = 0.0_JPRB + + ELSE + ZRAINAUT(JL) = 0.0_JPRB + ZRAINACC(JL) = 0.0_JPRB + ENDIF + + ! If temperature < 0, then autoconversion produces snow rather than rain + ! Explicit + IF(ZTP1(JL,JK) <= RTT) THEN + ZSOLQA(JL,NCLDQS,NCLDQL)=ZSOLQA(JL,NCLDQS,NCLDQL)+ZRAINAUT(JL) + ZSOLQA(JL,NCLDQS,NCLDQL)=ZSOLQA(JL,NCLDQS,NCLDQL)+ZRAINACC(JL) + ZSOLQA(JL,NCLDQL,NCLDQS)=ZSOLQA(JL,NCLDQL,NCLDQS)-ZRAINAUT(JL) + ZSOLQA(JL,NCLDQL,NCLDQS)=ZSOLQA(JL,NCLDQL,NCLDQS)-ZRAINACC(JL) + ELSE + ZSOLQA(JL,NCLDQR,NCLDQL)=ZSOLQA(JL,NCLDQR,NCLDQL)+ZRAINAUT(JL) + ZSOLQA(JL,NCLDQR,NCLDQL)=ZSOLQA(JL,NCLDQR,NCLDQL)+ZRAINACC(JL) + ZSOLQA(JL,NCLDQL,NCLDQR)=ZSOLQA(JL,NCLDQL,NCLDQR)-ZRAINAUT(JL) + ZSOLQA(JL,NCLDQL,NCLDQR)=ZSOLQA(JL,NCLDQL,NCLDQR)-ZRAINACC(JL) + ENDIF + + ENDIF ! on IWARMRAIN + + ENDIF ! on ZLIQCLD > ZEPSEC + ENDDO + + + !---------------------------------------------------------------------- + ! RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + ! only active if T<0degC and supercooled liquid water is present + ! AND if not Sundquist autoconversion (as this includes riming) + !---------------------------------------------------------------------- + IF (IWARMRAIN > 1) THEN + + DO JL=KIDIA,KFDIA + IF(ZTP1(JL,JK) <= RTT .AND. ZLIQCLD(JL)>ZEPSEC) THEN + + ! Fallspeed air density correction + ZFALLCORR = (RDENSREF/ZRHO(JL))**0.4_JPRB + + !------------------------------------------------------------------ + ! Riming of snow by cloud water - implicit in lwc + !------------------------------------------------------------------ + IF (ZSNOWCLD(JL)>ZEPSEC .AND. ZCOVPTOT(JL)>0.01_JPRB) THEN + + ! Calculate riming term + ! Factor of liq water taken out because implicit + ZSNOWRIME(JL) = 0.3_JPRB*ZCOVPTOT(JL)*PTSPHY*RCL_CONST7S*ZFALLCORR & + & *(ZRHO(JL)*ZSNOWCLD(JL)*RCL_CONST1S)**RCL_CONST8S + + ! Limit snow riming term + ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + + ZSOLQB(JL,NCLDQS,NCLDQL) = ZSOLQB(JL,NCLDQS,NCLDQL) + ZSNOWRIME(JL) + + ENDIF + + !------------------------------------------------------------------ + ! Riming of ice by cloud water - implicit in lwc + ! NOT YET ACTIVE + !------------------------------------------------------------------ +! IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN +! +! ! Calculate riming term +! ! Factor of liq water taken out because implicit +! ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & +! & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S +! +! ! Limit ice riming term +! ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) +! +! ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) +! +! ENDIF + ENDIF + ENDDO + + ENDIF ! on IWARMRAIN > 1 + + + !---------------------------------------------------------------------- + ! 4.4a MELTING OF SNOW and ICE + ! with new implicit solver this also has to treat snow or ice + ! precipitating from the level above... i.e. local ice AND flux. + ! in situ ice and snow: could arise from LS advection or warming + ! falling ice and snow: arrives by precipitation process + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + ZICETOT(JL)=ZQXFG(JL,NCLDQI)+ZQXFG(JL,NCLDQS) + ZMELTMAX(JL) = 0.0_JPRB + + ! If there are frozen hydrometeors present and dry-bulb temperature > 0degC + IF(ZICETOT(JL) > ZEPSEC .AND. ZTP1(JL,JK) > RTT) THEN + + ! Calculate subsaturation + ZSUBSAT = MAX(ZQSICE(JL,JK)-ZQX(JL,JK,NCLDQV),0.0_JPRB) + + ! Calculate difference between dry-bulb (ZTP1) and the temperature + ! at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + ! Melting only occurs if the wet-bulb temperature >0 + ! i.e. warming of ice particle due to melting > cooling + ! due to evaporation. + ZTDMTW0 = ZTP1(JL,JK)-RTT-ZSUBSAT* & + & (ZTW1+ZTW2*(PAP(JL,JK)-ZTW3)-ZTW4*(ZTP1(JL,JK)-ZTW5)) + ! Not implicit yet... + ! Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + ZCONS1 = ABS(PTSPHY*(1.0_JPRB+0.5_JPRB*ZTDMTW0)/RTAUMEL) + ZMELTMAX(JL) = MAX(ZTDMTW0*ZCONS1*ZRLDCP,0.0_JPRB) + ENDIF + ENDDO + + ! Loop over frozen hydrometeors (ice, snow) + DO JM=1,NCLV + IF (IPHASE(JM) == 2) THEN + JN = IMELT(JM) + DO JL=KIDIA,KFDIA + IF(ZMELTMAX(JL)>ZEPSEC .AND. ZICETOT(JL)>ZEPSEC) THEN + ! Apply melting in same proportion as frozen hydrometeor fractions + ZALFA = ZQXFG(JL,JM)/ZICETOT(JL) + ZMELT = MIN(ZQXFG(JL,JM),ZALFA*ZMELTMAX(JL)) + ! needed in first guess + ! This implies that zqpretot has to be recalculated below + ! since is not conserved here if ice falls and liquid doesn't + ZQXFG(JL,JM) = ZQXFG(JL,JM)-ZMELT + ZQXFG(JL,JN) = ZQXFG(JL,JN)+ZMELT + ZSOLQA(JL,JN,JM) = ZSOLQA(JL,JN,JM)+ZMELT + ZSOLQA(JL,JM,JN) = ZSOLQA(JL,JM,JN)-ZMELT + ENDIF + ENDDO + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.4b FREEZING of RAIN + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + + ! If rain present + IF (ZQX(JL,JK,NCLDQR) > ZEPSEC) THEN + + IF (ZTP1(JL,JK) <= RTT .AND. ZTP1(JL,JK-1) > RTT) THEN + ! Base of melting layer/top of refreezing layer so + ! store rain/snow fraction for precip type diagnosis + ! If mostly rain, then supercooled rain slow to freeze + ! otherwise faster to freeze (snow or ice pellets) + ZQPRETOT(JL) = MAX(ZQX(JL,JK,NCLDQS)+ZQX(JL,JK,NCLDQR),ZEPSEC) + PRAINFRAC_TOPRFZ(JL) = ZQX(JL,JK,NCLDQR)/ZQPRETOT(JL) + IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN + LLRAINLIQ(JL) = .True. + ELSE + LLRAINLIQ(JL) = .False. + ENDIF + ENDIF + + ! If temperature less than zero + IF (ZTP1(JL,JK) < RTT) THEN + + IF (PRAINFRAC_TOPRFZ(JL) > 0.8) THEN + + ! Majority of raindrops completely melted + ! Refreezing is by slow heterogeneous freezing + + ! Slope of rain particle size distribution + ZLAMBDA = (RCL_FAC1/(ZRHO(JL)*ZQX(JL,JK,NCLDQR)))**RCL_FAC2 + + ! Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ZTEMP = RCL_FZRAB * (ZTP1(JL,JK)-RTT) + ZFRZ = PTSPHY * (RCL_CONST5R/ZRHO(JL)) * (EXP(ZTEMP)-1._JPRB) & + & * ZLAMBDA**RCL_CONST6R + ZFRZMAX(JL) = MAX(ZFRZ,0.0_JPRB) + + ELSE + + ! Majority of raindrops only partially melted + ! Refreeze with a shorter timescale (reverse of melting...for now) + + ZCONS1 = ABS(PTSPHY*(1.0_JPRB+0.5_JPRB*(RTT-ZTP1(JL,JK)))/RTAUMEL) + ZFRZMAX(JL) = MAX((RTT-ZTP1(JL,JK))*ZCONS1*ZRLDCP,0.0_JPRB) + + ENDIF + + IF(ZFRZMAX(JL)>ZEPSEC) THEN + ZFRZ = MIN(ZQX(JL,JK,NCLDQR),ZFRZMAX(JL)) + ZSOLQA(JL,NCLDQS,NCLDQR) = ZSOLQA(JL,NCLDQS,NCLDQR)+ZFRZ + ZSOLQA(JL,NCLDQR,NCLDQS) = ZSOLQA(JL,NCLDQR,NCLDQS)-ZFRZ + ENDIF + ENDIF + + ENDIF + + ENDDO + + !---------------------------------------------------------------------- + ! 4.4c FREEZING of LIQUID + !---------------------------------------------------------------------- + DO JL=KIDIA,KFDIA + ! not implicit yet... + ZFRZMAX(JL)=MAX((RTHOMO-ZTP1(JL,JK))*ZRLDCP,0.0_JPRB) + ENDDO + + JM = NCLDQL + JN = IMELT(JM) + DO JL=KIDIA,KFDIA + IF(ZFRZMAX(JL)>ZEPSEC .AND. ZQXFG(JL,JM)>ZEPSEC) THEN + ZFRZ = MIN(ZQXFG(JL,JM),ZFRZMAX(JL)) + ZSOLQA(JL,JN,JM) = ZSOLQA(JL,JN,JM)+ZFRZ + ZSOLQA(JL,JM,JN) = ZSOLQA(JL,JM,JN)-ZFRZ + ENDIF + ENDDO + + !---------------------------------------------------------------------- + ! 4.5 EVAPORATION OF RAIN/SNOW + !---------------------------------------------------------------------- + + !---------------------------------------- + ! Rain evaporation scheme from Sundquist + !---------------------------------------- + IF (IEVAPRAIN == 1) THEN + + ! Rain + + DO JL=KIDIA,KFDIA + + ZZRH=RPRECRHMAX+(1.0_JPRB-RPRECRHMAX)*ZCOVPMAX(JL)/MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + ZZRH=MIN(MAX(ZZRH,RPRECRHMAX),1.0_JPRB) + + ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSLIQ(JL,JK))/& + & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + !--------------------------------------------- + ! humidity in moistest ZCOVPCLR part of domain + !--------------------------------------------- + ZQE=MAX(0.0_JPRB,MIN(ZQE,ZQSLIQ(JL,JK))) + LLO1=ZCOVPCLR(JL)>ZEPSEC .AND. & + & ZQXFG(JL,NCLDQR)>ZEPSEC .AND. & + & ZQE0.8) + !IF(ZSIGK > 0.8_JPRB) THEN + ! ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + !ENDIF + !ZZRH = MIN(ZRHC,ZZRH) + + ! Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + ZZRH = MIN(0.8_JPRB,ZZRH) + + ZQE=MAX(0.0_JPRB,MIN(ZQX(JL,JK,NCLDQV),ZQSLIQ(JL,JK))) + + LLO1=ZCOVPCLR(JL)>ZEPSEC .AND. & + & ZQXFG(JL,NCLDQR)>ZEPSEC .AND. & + & ZQEZEPSEC .AND. & + & ZQXFG(JL,NCLDQS)>ZEPSEC .AND. & + & ZQEZEPSEC .AND. & + & ZQX(JL,JK,NCLDQS)>ZEPSEC .AND. & + & ZQEliquid, snow->rain + + # marks falling species + llfall = np.ndarray(order="F", shape=(nclv,)) + # LLFALL=0, cloud cover must > 0 for zqx > 0 + # LLFALL=1, no cloud needed, zqx can evaporate + + + # Keep the following for possible future total water variance scheme? + #REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + #REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + #REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + #REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + #REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + zmeltmax = np.ndarray(order="F", shape=(klon,)) + zfrzmax = np.ndarray(order="F", shape=(klon,)) + zicetot = np.ndarray(order="F", shape=(klon,)) + + + #REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + + #REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + #REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + #REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + #REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + zdqsliqdt = np.ndarray(order="F", shape=(klon,)) + zdqsicedt = np.ndarray(order="F", shape=(klon,)) + zdqsmixdt = np.ndarray(order="F", shape=(klon,)) + zcorqsliq = np.ndarray(order="F", shape=(klon,)) + zcorqsice = np.ndarray(order="F", shape=(klon,)) + #REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + zcorqsmix = np.ndarray(order="F", shape=(klon,)) + zevaplimliq = np.ndarray(order="F", shape=(klon,)) + zevaplimice = np.ndarray(order="F", shape=(klon,)) + zevaplimmix = np.ndarray(order="F", shape=(klon,)) + + #------------------------------------------------------- + # SOURCE/SINK array for implicit and explicit terms + #------------------------------------------------------- + # a POSITIVE value entered into the arrays is a... + # Source of this variable + # | + # | Sink of this variable + # | | + # V V + # ZSOLQA(JL,IQa,IQb) = explicit terms + # ZSOLQB(JL,IQa,IQb) = implicit terms + # Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + # a source of NCLDQL and a sink of IQV + # put 'magic' source terms such as PLUDE from + # detrainment into explicit source/sink array diagnognal + # ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + # i.e. A positive value is a sink!????? weird... + #------------------------------------------------------- + + # e.g. microphysical pathways between ice variables. + # fall speeds of three categories + zvqx = np.ndarray(order="F", shape=(nclv,)) + + # for sedimentation source/sink terms + + # for convection detrainment source and subsidence source/sink terms + + # for supersaturation source term from previous timestep + + # Numerical fit to wet bulb temperature + ztw1 = 1329.31 + ztw2 = 0.0074615 + ztw3 = 0.85E5 + ztw4 = 40.637 + ztw5 = 275.0 + + # Subsaturation for snow melting term + # Diff between dry-bulb temperature and + # temperature when wet-bulb = 0degC + + # Variables for deposition term + # Temperature dependent function for ice PSD + # PSD correction factor + # for ice dep + # Distance from cloud top + zcldtopdist = np.ndarray(order="F", shape=(klon,)) + # No. of ice nuclei factor for deposition + + # Autoconversion/accretion/riming/evaporation + zrainacc = np.ndarray(order="F", shape=(klon,)) + zraincld = np.ndarray(order="F", shape=(klon,)) + zsnowrime = np.ndarray(order="F", shape=(klon,)) + zsnowcld = np.ndarray(order="F", shape=(klon,)) + + # Rain freezing + # True if majority of raindrops are liquid (no ice core) + llrainliq = np.ndarray(order="F", shape=(klon,)) + + #---------------------------- + # End: new microphysics + #---------------------------- + + #---------------------- + # SCM budget statistics + #---------------------- + + + zrg = np.ndarray(order="F", shape=(klon,)) + + + + psum_solqa = np.ndarray(order="F", shape=(klon,)) + + # #include "fcttre.func.h" + # #include "fccld.func.h" + #* + # ------------------------------------------------------------------ + + # This COMDECK includes the Thermodynamical functions for the cy39 + # ECMWF Physics package. + # Consistent with YOMCST Basic physics constants, assuming the + # partial pressure of water vapour is given by a first order + # Taylor expansion of Qs(T) w.r.t. to Temperature, using constants + # in YOETHF + # Two sets of functions are available. In the first set only the + # cases water or ice are distinguished by temperature. This set + # consists of the functions FOEDELTA,FOEEW,FOEDE and FOELH. + # The second set considers, besides the two cases water and ice + # also a mix of both for the temperature range YDTHF% RTICE < T < YDTHF% RTWAT. + # This set contains FOEALFA,FOEEWM,FOEDEM,FOELDCPM and FOELHM. + # FKOOP modifies the ice saturation mixing ratio for homogeneous + # nucleation. FOE_DEWM_DT provides an approximate first derivative + # of FOEEWM. + + # Depending on the consideration of mixed phases either the first + # set (e.g. surface, post-processing) or the second set + # (e.g. clouds, condensation, convection) should be used. + + # ------------------------------------------------------------------ + # ***************************************************************** + + # NO CONSIDERATION OF MIXED PHASES + + # ***************************************************************** + def foedelta(ptare): + return max(0.0, 1.0*np.sign(ptare - ydcst.rtt)) + + # FOEDELTA = 1 water + # FOEDELTA = 0 ice + + # THERMODYNAMICAL FUNCTIONS . + + # Pressure of water vapour at saturation + # INPUT : PTARE = TEMPERATURE + def foeew(ptare): + return ydthf.r2es*np.exp((ydthf.r3les*foedelta(ptare) + ydthf.r3ies*(1.0 - foedelta(ptare)))*(ptare - ydcst.rtt) / (ptare - (ydthf.r4les*foedelta(ptare) + ydthf.r4ies*(1.0 - foedelta(ptare))))) + + def foede(ptare): + return (foedelta(ptare)*ydthf.r5alvcp + (1.0 - foedelta(ptare))*ydthf.r5alscp) / (ptare - (ydthf.r4les*foedelta(ptare) + ydthf.r4ies*(1.0 - foedelta(ptare))))**2 + + def foedesu(ptare): + return (foedelta(ptare)*ydthf.r5les + (1.0 - foedelta(ptare))*ydthf.r5ies) / (ptare - (ydthf.r4les*foedelta(ptare) + ydthf.r4ies*(1.0 - foedelta(ptare))))**2 + + def foelh(ptare): + return foedelta(ptare)*ydcst.rlvtt + (1.0 - foedelta(ptare))*ydcst.rlstt + + def foeldcp(ptare): + return foedelta(ptare)*ydthf.ralvdcp + (1.0 - foedelta(ptare))*ydthf.ralsdcp + + # ***************************************************************** + + # CONSIDERATION OF MIXED PHASES + + # ***************************************************************** + + # FOEALFA is calculated to distinguish the three cases: + + # FOEALFA=1 water phase + # FOEALFA=0 ice phase + # 0 < FOEALFA < 1 mixed phase + + # INPUT : PTARE = TEMPERATURE + def foealfa(ptare): + return min(1.0, ((max(ydthf.rtice, min(ydthf.rtwat, ptare)) - ydthf.rtice)*ydthf.rtwat_rtice_r)**2) + + + # Pressure of water vapour at saturation + # INPUT : PTARE = TEMPERATURE + def foeewm(ptare): + return ydthf.r2es*(foealfa(ptare)*np.exp(ydthf.r3les*(ptare - ydcst.rtt) / (ptare - ydthf.r4les)) + (1.0 - foealfa(ptare))*np.exp(ydthf.r3ies*(ptare - ydcst.rtt) / (ptare - ydthf.r4ies))) + + def foe_dewm_dt(ptare): + return ydthf.r2es*(ydthf.r3les*foealfa(ptare)*np.exp(ydthf.r3les*(ptare - ydcst.rtt) / (ptare - ydthf.r4les))*(ydcst.rtt - ydthf.r4les) / (ptare - ydthf.r4les)**2 + ydthf.r3ies*(1.0 - foealfa(ptare))*np.exp(ydthf.r3ies*(ptare - ydcst.rtt) / (ptare - ydthf.r4ies))*(ydcst.rtt - ydthf.r4ies) / + (ptare - ydthf.r4ies)**2) + + def foedem(ptare): + return foealfa(ptare)*ydthf.r5alvcp*(1.0 / (ptare - ydthf.r4les)**2) + (1.0 - foealfa(ptare))*ydthf.r5alscp*(1.0 / (ptare - ydthf.r4ies)**2) + + def foeldcpm(ptare): + return foealfa(ptare)*ydthf.ralvdcp + (1.0 - foealfa(ptare))*ydthf.ralsdcp + + def foelhm(ptare): + return foealfa(ptare)*ydcst.rlvtt + (1.0 - foealfa(ptare))*ydcst.rlstt + + + # Temperature normalization for humidity background change of variable + # INPUT : PTARE = TEMPERATURE + def foetb(ptare): + return foealfa(ptare)*ydthf.r3les*(ydcst.rtt - ydthf.r4les)*(1.0 / (ptare - ydthf.r4les)**2) + (1.0 - foealfa(ptare))*ydthf.r3ies*(ydcst.rtt - ydthf.r4ies)*(1.0 / (ptare - ydthf.r4ies)**2) + + # ------------------------------------------------------------------ + # ***************************************************************** + + # CONSIDERATION OF DIFFERENT MIXED PHASE FOR CONV + + # ***************************************************************** + + # FOEALFCU is calculated to distinguish the three cases: + + # FOEALFCU=1 water phase + # FOEALFCU=0 ice phase + # 0 < FOEALFCU < 1 mixed phase + + # INPUT : PTARE = TEMPERATURE + def foealfcu(ptare): + return min(1.0, ((max(ydthf.rticecu, min(ydthf.rtwat, ptare)) - ydthf.rticecu)*ydthf.rtwat_rticecu_r)**2) + + + # Pressure of water vapour at saturation + # INPUT : PTARE = TEMPERATURE + def foeewmcu(ptare): + return ydthf.r2es*(foealfcu(ptare)*np.exp(ydthf.r3les*(ptare - ydcst.rtt) / (ptare - ydthf.r4les)) + (1.0 - foealfcu(ptare))*np.exp(ydthf.r3ies*(ptare - ydcst.rtt) / (ptare - ydthf.r4ies))) + + def foedemcu(ptare): + return foealfcu(ptare)*ydthf.r5alvcp*(1.0 / (ptare - ydthf.r4les)**2) + (1.0 - foealfcu(ptare))*ydthf.r5alscp*(1.0 / (ptare - ydthf.r4ies)**2) + + def foeldcpmcu(ptare): + return foealfcu(ptare)*ydthf.ralvdcp + (1.0 - foealfcu(ptare))*ydthf.ralsdcp + + def foelhmcu(ptare): + return foealfcu(ptare)*ydcst.rlvtt + (1.0 - foealfcu(ptare))*ydcst.rlstt + # ------------------------------------------------------------------ + + # Pressure of water vapour at saturation + # This one is for the WMO definition of saturation, i.e. always + # with respect to water. + # + # Duplicate to FOEELIQ and FOEEICE for separate ice variable + # FOEELIQ always respect to water + # FOEEICE always respect to ice + # (could use FOEEW and FOEEWMO, but naming convention unclear) + # FOELSON returns e wrt liquid water using D Sonntag (1994, Met. Zeit.) + # - now recommended for use with radiosonde data (WMO CIMO guide, 2014) + # unlike the FOEE functions does not include 1/( YDCST% RETV+1.0_JPRB) factor + + def foeewmo(ptare): + return ydthf.r2es*np.exp(ydthf.r3les*(ptare - ydcst.rtt) / (ptare - ydthf.r4les)) + def foeeliq(ptare): + return ydthf.r2es*np.exp(ydthf.r3les*(ptare - ydcst.rtt) / (ptare - ydthf.r4les)) + def foeeice(ptare): + return ydthf.r2es*np.exp(ydthf.r3ies*(ptare - ydcst.rtt) / (ptare - ydthf.r4ies)) + def foelson(ptare): + return np.exp(-6096.9385 / ptare + 21.2409642 - 2.711193E-2*ptare + 1.673952E-5*ptare**2 + 2.433502*log(ptare)) + + def foeles_v(ptare): + return ydthf.r3les*(ptare - ydcst.rtt) / (ptare - ydthf.r4les) + def foeies_v(ptare): + return ydthf.r3ies*(ptare - ydcst.rtt) / (ptare - ydthf.r4ies) + def foeewm_v(ptare, exp1, exp2): + return ydthf.r2es*(foealfa(ptare)*exp1 + (1.0 - foealfa(ptare))*exp2) + def foeewmcu_v(ptare, exp1, exp2): + return ydthf.r2es*(foealfcu(ptare)*exp1 + (1.0 - foealfcu(ptare))*exp2) + # (C) Copyright 1988- ECMWF. + # + # This software is licensed under the terms of the Apache Licence Version 2.0 + # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + # + # In applying this licence, ECMWF does not waive the privileges and immunities + # granted to it by virtue of its status as an intergovernmental organisation + # nor does it submit to any jurisdiction. + + #* + # ------------------------------------------------------------------ + # This COMDECK defines functions to be used in the cloud scheme + # other than the standard saturation vapour pressure + # + # FKOOP modifies the ice saturation mixing ratio for homogeneous + # nucleation + # + # note: PTARE is temperature and is definited in frttre.h + # which MUST be included before this function block + # + # ********************************************** + # KOOP formula for homogeneous nucleation of ice + # ********************************************** + # + # INPUT : PTARE = TEMPERATURE + def fokoop(ptare): + return min(ydthf.rkoop1 - ydthf.rkoop2*ptare, foeeliq(ptare) / foeeice(ptare)) + #=============================================================================== + #IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) + zfoealfa = np.ndarray(order="F", shape=(klev + 1, klon,)) + ztp1 = np.ndarray(order="F", shape=(klev, klon,)) + zlcust = np.ndarray(order="F", shape=(nclv, klon,)) + zli = np.ndarray(order="F", shape=(klev, klon,)) + za = np.ndarray(order="F", shape=(klev, klon,)) + zaorig = np.ndarray(order="F", shape=(klev, klon,)) + llindex1 = np.ndarray(order="F", shape=(nclv, klon,)) + llindex3 = np.ndarray(order="F", shape=(nclv, nclv, klon,)) + iorder = np.ndarray(order="F", shape=(nclv, klon,)) + zliqfrac = np.ndarray(order="F", shape=(klev, klon,)) + zicefrac = np.ndarray(order="F", shape=(klev, klon,)) + zqx = np.ndarray(order="F", shape=(nclv, klev, klon,)) + zqx0 = np.ndarray(order="F", shape=(nclv, klev, klon,)) + zqxn = np.ndarray(order="F", shape=(nclv, klon,)) + zqxfg = np.ndarray(order="F", shape=(nclv, klon,)) + zqxnm1 = np.ndarray(order="F", shape=(nclv, klon,)) + zfluxq = np.ndarray(order="F", shape=(nclv, klon,)) + zpfplsx = np.ndarray(order="F", shape=(nclv, klev + 1, klon,)) + zlneg = np.ndarray(order="F", shape=(nclv, klev, klon,)) + zqxn2d = np.ndarray(order="F", shape=(nclv, klev, klon,)) + zqsmix = np.ndarray(order="F", shape=(klev, klon,)) + zqsliq = np.ndarray(order="F", shape=(klev, klon,)) + zqsice = np.ndarray(order="F", shape=(klev, klon,)) + zfoeewmt = np.ndarray(order="F", shape=(klev, klon,)) + zfoeew = np.ndarray(order="F", shape=(klev, klon,)) + zfoeeliqt = np.ndarray(order="F", shape=(klev, klon,)) + zsolqa = np.ndarray(order="F", shape=(nclv, nclv, klon,)) + zsolqb = np.ndarray(order="F", shape=(nclv, nclv, klon,)) + zqlhs = np.ndarray(order="F", shape=(nclv, nclv, klon,)) + zratio = np.ndarray(order="F", shape=(nclv, klon,)) + zsinksum = np.ndarray(order="F", shape=(nclv, klon,)) + zfallsink = np.ndarray(order="F", shape=(nclv, klon,)) + zfallsrce = np.ndarray(order="F", shape=(nclv, klon,)) + zconvsrce = np.ndarray(order="F", shape=(nclv, klon,)) + zconvsink = np.ndarray(order="F", shape=(nclv, klon,)) + zpsupsatsrce = np.ndarray(order="F", shape=(nclv, klon,)) + + # YDCST, YDTHF + + + + + + #=============================================================================== + # 0.0 Beginning of timestep book-keeping + #---------------------------------------------------------------------- + + + ####################################################################### + # 0. *** SET UP CONSTANTS *** + ####################################################################### + + # ZEPSILON=100._JPRB*EPSILON(ZEPSILON) + zepsilon = 1.E-14 + + # --------------------------------------------------------------------- + # Set version of warm-rain autoconversion/accretion + # IWARMRAIN = 1 ! Sundquist + # IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + # --------------------------------------------------------------------- + iwarmrain = 2 + # --------------------------------------------------------------------- + # Set version of rain evaporation + # IEVAPRAIN = 1 ! Sundquist + # IEVAPRAIN = 2 ! Abel and Boutle (2013) + # --------------------------------------------------------------------- + ievaprain = 2 + # --------------------------------------------------------------------- + # Set version of snow evaporation + # IEVAPSNOW = 1 ! Sundquist + # IEVAPSNOW = 2 ! New + # --------------------------------------------------------------------- + ievapsnow = 1 + # --------------------------------------------------------------------- + # Set version of ice deposition + # IDEPICE = 1 ! Rotstayn (2001) + # IDEPICE = 2 ! New + # --------------------------------------------------------------------- + idepice = 1 + + # --------------------- + # Some simple constants + # --------------------- + zqtmst = 1.0 / ptsphy + zgdcp = ydcst.rg / ydcst.rcpd + zrdcp = ydcst.rd / ydcst.rcpd + zcons1a = ydcst.rcpd / (ydcst.rlmlt*ydcst.rg*yrecldp.rtaumel) + zepsec = 1.E-14 + zrg_r = 1.0 / ydcst.rg + zrldcp = 1.0 / (ydthf.ralsdcp - ydthf.ralvdcp) + + # Note: Defined in module/yoecldp.F90 + # NCLDQL=1 ! liquid cloud water + # NCLDQI=2 ! ice cloud water + # NCLDQR=3 ! rain water + # NCLDQS=4 ! snow + # NCLDQV=5 ! vapour + + # ----------------------------------------------- + # Define species phase, 0=vapour, 1=liquid, 2=ice + # ----------------------------------------------- + iphase[ncldqv - 1] = 0 + iphase[ncldql - 1] = 1 + iphase[ncldqr - 1] = 1 + iphase[ncldqi - 1] = 2 + iphase[ncldqs - 1] = 2 + + # --------------------------------------------------- + # Set up melting/freezing index, + # if an ice category melts/freezes, where does it go? + # --------------------------------------------------- + imelt[ncldqv - 1] = -99 + imelt[ncldql - 1] = ncldqi + imelt[ncldqr - 1] = ncldqs + imelt[ncldqi - 1] = ncldqr + imelt[ncldqs - 1] = ncldqr + + # ----------------------------------------------- + # INITIALIZATION OF OUTPUT TENDENCIES + # ----------------------------------------------- + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + tendency_loc_t[jk - 1, jl - 1] = 0.0 + tendency_loc_q[jk - 1, jl - 1] = 0.0 + tendency_loc_a[jk - 1, jl - 1] = 0.0 + for jm in range(1, nclv - 1 + 1): + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + tendency_loc_cld[jm - 1, jk - 1, jl - 1] = 0.0 + + #-- These were uninitialized : meaningful only when we compare error differences + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + pcovptot[jk - 1, jl - 1] = 0.0 + tendency_loc_cld[nclv - 1, jk - 1, jl - 1] = 0.0 + + # ------------------------- + # set up fall speeds in m/s + # ------------------------- + zvqx[ncldqv - 1] = 0.0 + zvqx[ncldql - 1] = 0.0 + zvqx[ncldqi - 1] = yrecldp.rvice + zvqx[ncldqr - 1] = yrecldp.rvrain + zvqx[ncldqs - 1] = yrecldp.rvsnow + llfall[:] = False + for jm in range(1, nclv + 1): + if zvqx[jm - 1] > 0.0: + llfall[jm - 1] = True + # falling species + # Set LLFALL to false for ice (but ice still sediments!) + # Need to rationalise this at some point + llfall[ncldqi - 1] = False + + + ####################################################################### + # 1. *** INITIAL VALUES FOR VARIABLES *** + ####################################################################### + + + # ---------------------- + # non CLV initialization + # ---------------------- + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + ztp1[jk - 1, jl - 1] = pt[jk - 1, jl - 1] + ptsphy*tendency_tmp_t[jk - 1, jl - 1] + zqx[ncldqv - 1, jk - 1, jl - 1] = pq[jk - 1, jl - 1] + ptsphy*tendency_tmp_q[jk - 1, jl - 1] + zqx0[ncldqv - 1, jk - 1, jl - 1] = pq[jk - 1, jl - 1] + ptsphy*tendency_tmp_q[jk - 1, jl - 1] + za[jk - 1, jl - 1] = pa[jk - 1, jl - 1] + ptsphy*tendency_tmp_a[jk - 1, jl - 1] + zaorig[jk - 1, jl - 1] = pa[jk - 1, jl - 1] + ptsphy*tendency_tmp_a[jk - 1, jl - 1] + + # ------------------------------------- + # initialization for CLV family + # ------------------------------------- + for jm in range(1, nclv - 1 + 1): + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + zqx[jm - 1, jk - 1, jl - 1] = pclv[jm - 1, jk - 1, jl - 1] + ptsphy*tendency_tmp_cld[jm - 1, jk - 1, jl - 1] + zqx0[jm - 1, jk - 1, jl - 1] = pclv[jm - 1, jk - 1, jl - 1] + ptsphy*tendency_tmp_cld[jm - 1, jk - 1, jl - 1] + + #------------- + # zero arrays + #------------- + for jm in range(1, nclv + 1): + for jk in range(1, klev + 1 + 1): + for jl in range(kidia, kfdia + 1): + zpfplsx[jm - 1, jk - 1, jl - 1] = 0.0 # precip fluxes + + for jm in range(1, nclv + 1): + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + zqxn2d[jm - 1, jk - 1, jl - 1] = 0.0 # end of timestep values in 2D + zlneg[jm - 1, jk - 1, jl - 1] = 0.0 # negative input check + + for jl in range(kidia, kfdia + 1): + prainfrac_toprfz[jl - 1] = 0.0 # rain fraction at top of refreezing layer + llrainliq[:] = True # Assume all raindrops are liquid initially + + # ---------------------------------------------------- + # Tidy up very small cloud cover or total cloud water + # ---------------------------------------------------- + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + if zqx[ncldql - 1, jk - 1, jl - 1] + zqx[ncldqi - 1, jk - 1, jl - 1] < yrecldp.rlmin or za[jk - 1, jl - 1] < yrecldp.ramin: + + # Evaporate small cloud liquid water amounts + zlneg[ncldql - 1, jk - 1, jl - 1] = zlneg[ncldql - 1, jk - 1, jl - 1] + zqx[ncldql - 1, jk - 1, jl - 1] + zqadj = zqx[ncldql - 1, jk - 1, jl - 1]*zqtmst + tendency_loc_q[jk - 1, jl - 1] = tendency_loc_q[jk - 1, jl - 1] + zqadj + tendency_loc_t[jk - 1, jl - 1] = tendency_loc_t[jk - 1, jl - 1] - ydthf.ralvdcp*zqadj + zqx[ncldqv - 1, jk - 1, jl - 1] = zqx[ncldqv - 1, jk - 1, jl - 1] + zqx[ncldql - 1, jk - 1, jl - 1] + zqx[ncldql - 1, jk - 1, jl - 1] = 0.0 + + # Evaporate small cloud ice water amounts + zlneg[ncldqi - 1, jk - 1, jl - 1] = zlneg[ncldqi - 1, jk - 1, jl - 1] + zqx[ncldqi - 1, jk - 1, jl - 1] + zqadj = zqx[ncldqi - 1, jk - 1, jl - 1]*zqtmst + tendency_loc_q[jk - 1, jl - 1] = tendency_loc_q[jk - 1, jl - 1] + zqadj + tendency_loc_t[jk - 1, jl - 1] = tendency_loc_t[jk - 1, jl - 1] - ydthf.ralsdcp*zqadj + zqx[ncldqv - 1, jk - 1, jl - 1] = zqx[ncldqv - 1, jk - 1, jl - 1] + zqx[ncldqi - 1, jk - 1, jl - 1] + zqx[ncldqi - 1, jk - 1, jl - 1] = 0.0 + + # Set cloud cover to zero + za[jk - 1, jl - 1] = 0.0 + + + # --------------------------------- + # Tidy up small CLV variables + # --------------------------------- + #DIR$ IVDEP + for jm in range(1, nclv - 1 + 1): + #DIR$ IVDEP + for jk in range(1, klev + 1): + #DIR$ IVDEP + for jl in range(kidia, kfdia + 1): + if zqx[jm - 1, jk - 1, jl - 1] < yrecldp.rlmin: + zlneg[jm - 1, jk - 1, jl - 1] = zlneg[jm - 1, jk - 1, jl - 1] + zqx[jm - 1, jk - 1, jl - 1] + zqadj = zqx[jm - 1, jk - 1, jl - 1]*zqtmst + tendency_loc_q[jk - 1, jl - 1] = tendency_loc_q[jk - 1, jl - 1] + zqadj + if iphase[jm - 1] == 1: + tendency_loc_t[jk - 1, jl - 1] = tendency_loc_t[jk - 1, jl - 1] - ydthf.ralvdcp*zqadj + if iphase[jm - 1] == 2: + tendency_loc_t[jk - 1, jl - 1] = tendency_loc_t[jk - 1, jl - 1] - ydthf.ralsdcp*zqadj + zqx[ncldqv - 1, jk - 1, jl - 1] = zqx[ncldqv - 1, jk - 1, jl - 1] + zqx[jm - 1, jk - 1, jl - 1] + zqx[jm - 1, jk - 1, jl - 1] = 0.0 + + + # ------------------------------ + # Define saturation values + # ------------------------------ + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + #---------------------------------------- + # old *diagnostic* mixed phase saturation + #---------------------------------------- + zfoealfa[jk - 1, jl - 1] = foealfa(ztp1[jk - 1, jl - 1]) + zfoeewmt[jk - 1, jl - 1] = min(foeewm(ztp1[jk - 1, jl - 1]) / pap[jk - 1, jl - 1], 0.5) + zqsmix[jk - 1, jl - 1] = zfoeewmt[jk - 1, jl - 1] + zqsmix[jk - 1, jl - 1] = zqsmix[jk - 1, jl - 1] / (1.0 - ydcst.retv*zqsmix[jk - 1, jl - 1]) + + #--------------------------------------------- + # ice saturation T<273K + # liquid water saturation for T>273K + #--------------------------------------------- + zalfa = foedelta(ztp1[jk - 1, jl - 1]) + zfoeew[jk - 1, jl - 1] = min((zalfa*foeeliq(ztp1[jk - 1, jl - 1]) + (1.0 - zalfa)*foeeice(ztp1[jk - 1, jl - 1])) / pap[jk - 1, jl - 1], 0.5) + zfoeew[jk - 1, jl - 1] = min(0.5, zfoeew[jk - 1, jl - 1]) + zqsice[jk - 1, jl - 1] = zfoeew[jk - 1, jl - 1] / (1.0 - ydcst.retv*zfoeew[jk - 1, jl - 1]) + + #---------------------------------- + # liquid water saturation + #---------------------------------- + zfoeeliqt[jk - 1, jl - 1] = min(foeeliq(ztp1[jk - 1, jl - 1]) / pap[jk - 1, jl - 1], 0.5) + zqsliq[jk - 1, jl - 1] = zfoeeliqt[jk - 1, jl - 1] + zqsliq[jk - 1, jl - 1] = zqsliq[jk - 1, jl - 1] / (1.0 - ydcst.retv*zqsliq[jk - 1, jl - 1]) + + # !---------------------------------- + # ! ice water saturation + # !---------------------------------- + # ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + # ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + # ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + + + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + + + #------------------------------------------ + # Ensure cloud fraction is between 0 and 1 + #------------------------------------------ + za[jk - 1, jl - 1] = max(0.0, min(1.0, za[jk - 1, jl - 1])) + + #------------------------------------------------------------------- + # Calculate liq/ice fractions (no longer a diagnostic relationship) + #------------------------------------------------------------------- + zli[jk - 1, jl - 1] = zqx[ncldql - 1, jk - 1, jl - 1] + zqx[ncldqi - 1, jk - 1, jl - 1] + if zli[jk - 1, jl - 1] > yrecldp.rlmin: + zliqfrac[jk - 1, jl - 1] = zqx[ncldql - 1, jk - 1, jl - 1] / zli[jk - 1, jl - 1] + zicefrac[jk - 1, jl - 1] = 1.0 - zliqfrac[jk - 1, jl - 1] + else: + zliqfrac[jk - 1, jl - 1] = 0.0 + zicefrac[jk - 1, jl - 1] = 0.0 + + + ####################################################################### + # 2. *** CONSTANTS AND PARAMETERS *** + ####################################################################### + # Calculate L in updrafts of bl-clouds + # Specify QS, P/PS for tropopause (for c2) + # And initialize variables + #------------------------------------------ + + #--------------------------------- + # Find tropopause level (ZTRPAUS) + #--------------------------------- + for jl in range(kidia, kfdia + 1): + ztrpaus[jl - 1] = 0.1 + zpaphd[jl - 1] = 1.0 / paph[klev + 1 - 1, jl - 1] + for jk in range(1, klev - 1 + 1): + for jl in range(kidia, kfdia + 1): + zsig = pap[jk - 1, jl - 1]*zpaphd[jl - 1] + if zsig > 0.1 and zsig < 0.4 and ztp1[jk - 1, jl - 1] > ztp1[jk + 1 - 1, jl - 1]: + ztrpaus[jl - 1] = zsig + + #----------------------------- + # Reset single level variables + #----------------------------- + + for jl in range(kidia, kfdia + 1): + zanewm1[jl - 1] = 0.0 + zda[jl - 1] = 0.0 + zcovpclr[jl - 1] = 0.0 + zcovpmax[jl - 1] = 0.0 + zcovptot[jl - 1] = 0.0 + zcldtopdist[jl - 1] = 0.0 + + ####################################################################### + # 3. *** PHYSICS *** + ####################################################################### + + + #---------------------------------------------------------------------- + # START OF VERTICAL LOOP + #---------------------------------------------------------------------- + + for jk in range(yrecldp.ncldtop, klev + 1): + + #---------------------------------------------------------------------- + # 3.0 INITIALIZE VARIABLES + #---------------------------------------------------------------------- + + #--------------------------------- + # First guess microphysics + #--------------------------------- + for jm in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zqxfg[jm - 1, jl - 1] = zqx[jm - 1, jk - 1, jl - 1] + + #--------------------------------- + # Set KLON arrays to zero + #--------------------------------- + + for jl in range(kidia, kfdia + 1): + zlicld[jl - 1] = 0.0 + zrainaut[jl - 1] = 0.0 # currently needed for diags + zrainacc[jl - 1] = 0.0 # currently needed for diags + zsnowaut[jl - 1] = 0.0 # needed + zldefr[jl - 1] = 0.0 + zacust[jl - 1] = 0.0 # set later when needed + zqpretot[jl - 1] = 0.0 + zlfinalsum[jl - 1] = 0.0 + + # Required for first guess call + zlcond1[jl - 1] = 0.0 + zlcond2[jl - 1] = 0.0 + zsupsat[jl - 1] = 0.0 + zlevapl[jl - 1] = 0.0 + zlevapi[jl - 1] = 0.0 + + #------------------------------------- + # solvers for cloud fraction + #------------------------------------- + zsolab[jl - 1] = 0.0 + zsolac[jl - 1] = 0.0 + + zicetot[jl - 1] = 0.0 + + #------------------------------------------ + # reset matrix so missing pathways are set + #------------------------------------------ + for jm in range(1, nclv + 1): + for jn in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zsolqb[jm - 1, jn - 1, jl - 1] = 0.0 + zsolqa[jm - 1, jn - 1, jl - 1] = 0.0 + + #---------------------------------- + # reset new microphysics variables + #---------------------------------- + for jm in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zfallsrce[jm - 1, jl - 1] = 0.0 + zfallsink[jm - 1, jl - 1] = 0.0 + zconvsrce[jm - 1, jl - 1] = 0.0 + zconvsink[jm - 1, jl - 1] = 0.0 + zpsupsatsrce[jm - 1, jl - 1] = 0.0 + zratio[jm - 1, jl - 1] = 0.0 + + for jl in range(kidia, kfdia + 1): + + #------------------------- + # derived variables needed + #------------------------- + + zdp[jl - 1] = paph[jk + 1 - 1, jl - 1] - paph[jk - 1, jl - 1] # dp + zgdp[jl - 1] = ydcst.rg / zdp[jl - 1] # g/dp + zrho[jl - 1] = pap[jk - 1, jl - 1] / (ydcst.rd*ztp1[jk - 1, jl - 1]) # p/RT air density + + zdtgdp[jl - 1] = ptsphy*zgdp[jl - 1] # dt g/dp + zrdtgdp[jl - 1] = zdp[jl - 1]*(1.0 / (ptsphy*ydcst.rg)) # 1/(dt g/dp) + + if jk > 1: + zdtgdpf[jl - 1] = ptsphy*ydcst.rg / (pap[jk - 1, jl - 1] - pap[jk - 1 - 1, jl - 1]) + + #------------------------------------ + # Calculate dqs/dT correction factor + #------------------------------------ + # Reminder: RETV=RV/RD-1 + + # liquid + zfacw = ydthf.r5les / ((ztp1[jk - 1, jl - 1] - ydthf.r4les)**2) + zcor = 1.0 / (1.0 - ydcst.retv*zfoeeliqt[jk - 1, jl - 1]) + zdqsliqdt[jl - 1] = zfacw*zcor*zqsliq[jk - 1, jl - 1] + zcorqsliq[jl - 1] = 1.0 + ydthf.ralvdcp*zdqsliqdt[jl - 1] + + # ice + zfaci = ydthf.r5ies / ((ztp1[jk - 1, jl - 1] - ydthf.r4ies)**2) + zcor = 1.0 / (1.0 - ydcst.retv*zfoeew[jk - 1, jl - 1]) + zdqsicedt[jl - 1] = zfaci*zcor*zqsice[jk - 1, jl - 1] + zcorqsice[jl - 1] = 1.0 + ydthf.ralsdcp*zdqsicedt[jl - 1] + + # diagnostic mixed + zalfaw = zfoealfa[jk - 1, jl - 1] + zalfawm[jl - 1] = zalfaw + zfac = zalfaw*zfacw + (1.0 - zalfaw)*zfaci + zcor = 1.0 / (1.0 - ydcst.retv*zfoeewmt[jk - 1, jl - 1]) + zdqsmixdt[jl - 1] = zfac*zcor*zqsmix[jk - 1, jl - 1] + zcorqsmix[jl - 1] = 1.0 + foeldcpm(ztp1[jk - 1, jl - 1])*zdqsmixdt[jl - 1] + + # evaporation/sublimation limits + zevaplimmix[jl - 1] = max((zqsmix[jk - 1, jl - 1] - zqx[ncldqv - 1, jk - 1, jl - 1]) / zcorqsmix[jl - 1], 0.0) + zevaplimliq[jl - 1] = max((zqsliq[jk - 1, jl - 1] - zqx[ncldqv - 1, jk - 1, jl - 1]) / zcorqsliq[jl - 1], 0.0) + zevaplimice[jl - 1] = max((zqsice[jk - 1, jl - 1] - zqx[ncldqv - 1, jk - 1, jl - 1]) / zcorqsice[jl - 1], 0.0) + + #-------------------------------- + # in-cloud consensate amount + #-------------------------------- + ztmpa = 1.0 / max(za[jk - 1, jl - 1], zepsec) + zliqcld[jl - 1] = zqx[ncldql - 1, jk - 1, jl - 1]*ztmpa + zicecld[jl - 1] = zqx[ncldqi - 1, jk - 1, jl - 1]*ztmpa + zlicld[jl - 1] = zliqcld[jl - 1] + zicecld[jl - 1] + + + #------------------------------------------------ + # Evaporate very small amounts of liquid and ice + #------------------------------------------------ + for jl in range(kidia, kfdia + 1): + + if zqx[ncldql - 1, jk - 1, jl - 1] < yrecldp.rlmin: + zsolqa[ncldql - 1, ncldqv - 1, jl - 1] = zqx[ncldql - 1, jk - 1, jl - 1] + zsolqa[ncldqv - 1, ncldql - 1, jl - 1] = -zqx[ncldql - 1, jk - 1, jl - 1] + + if zqx[ncldqi - 1, jk - 1, jl - 1] < yrecldp.rlmin: + zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] = zqx[ncldqi - 1, jk - 1, jl - 1] + zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] = -zqx[ncldqi - 1, jk - 1, jl - 1] + + + #--------------------------------------------------------------------- + # 3.1 ICE SUPERSATURATION ADJUSTMENT + #--------------------------------------------------------------------- + # Note that the supersaturation adjustment is made with respect to + # liquid saturation: when T>0C + # ice saturation: when T<0C + # with an adjustment made to allow for ice + # supersaturation in the clear sky + # Note also that the KOOP factor automatically clips the supersaturation + # to a maximum set by the liquid water saturation mixing ratio + # important for temperatures near to but below 0C + #----------------------------------------------------------------------- + + #DIR$ NOFUSION + for jl in range(kidia, kfdia + 1): + + #----------------------------------- + # 3.1.1 Supersaturation limit (from Koop) + #----------------------------------- + # Needs to be set for all temperatures + zfokoop[jl - 1] = fokoop(ztp1[jk - 1, jl - 1]) + for jl in range(kidia, kfdia + 1): + + if ztp1[jk - 1, jl - 1] >= ydcst.rtt or yrecldp.nssopt == 0: + zfac = 1.0 + zfaci = 1.0 + else: + zfac = za[jk - 1, jl - 1] + zfokoop[jl - 1]*(1.0 - za[jk - 1, jl - 1]) + zfaci = ptsphy / yrecldp.rkooptau + + #------------------------------------------------------------------- + # 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + # correction factor + # [#Note: QSICE or QSLIQ] + #------------------------------------------------------------------- + + # Calculate supersaturation to add to cloud + if za[jk - 1, jl - 1] > 1.0 - yrecldp.ramin: + zsupsat[jl - 1] = max((zqx[ncldqv - 1, jk - 1, jl - 1] - zfac*zqsice[jk - 1, jl - 1]) / zcorqsice[jl - 1], 0.0) + else: + # Calculate environmental humidity supersaturation + zqp1env = (zqx[ncldqv - 1, jk - 1, jl - 1] - za[jk - 1, jl - 1]*zqsice[jk - 1, jl - 1]) / max(1.0 - za[jk - 1, jl - 1], zepsilon) + #& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + zsupsat[jl - 1] = max((1.0 - za[jk - 1, jl - 1])*(zqp1env - zfac*zqsice[jk - 1, jl - 1]) / zcorqsice[jl - 1], 0.0) + + #------------------------------------------------------------------- + # Here the supersaturation is turned into liquid water + # However, if the temperature is below the threshold for homogeneous + # freezing then the supersaturation is turned instantly to ice. + #-------------------------------------------------------------------- + + if zsupsat[jl - 1] > zepsec: + + if ztp1[jk - 1, jl - 1] > yrecldp.rthomo: + # Turn supersaturation into liquid water + zsolqa[ncldqv - 1, ncldql - 1, jl - 1] = zsolqa[ncldqv - 1, ncldql - 1, jl - 1] + zsupsat[jl - 1] + zsolqa[ncldql - 1, ncldqv - 1, jl - 1] = zsolqa[ncldql - 1, ncldqv - 1, jl - 1] - zsupsat[jl - 1] + # Include liquid in first guess + zqxfg[ncldql - 1, jl - 1] = zqxfg[ncldql - 1, jl - 1] + zsupsat[jl - 1] + else: + # Turn supersaturation into ice water + zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] + zsupsat[jl - 1] + zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] - zsupsat[jl - 1] + # Add ice to first guess for deposition term + zqxfg[ncldqi - 1, jl - 1] = zqxfg[ncldqi - 1, jl - 1] + zsupsat[jl - 1] + + # Increase cloud amount using RKOOPTAU timescale + zsolac[jl - 1] = (1.0 - za[jk - 1, jl - 1])*zfaci + + + #------------------------------------------------------- + # 3.1.3 Include supersaturation from previous timestep + # (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + #------------------------------------------------------- + if psupsat[jk - 1, jl - 1] > zepsec: + if ztp1[jk - 1, jl - 1] > yrecldp.rthomo: + # Turn supersaturation into liquid water + zsolqa[ncldql - 1, ncldql - 1, jl - 1] = zsolqa[ncldql - 1, ncldql - 1, jl - 1] + psupsat[jk - 1, jl - 1] + zpsupsatsrce[ncldql - 1, jl - 1] = psupsat[jk - 1, jl - 1] + # Add liquid to first guess for deposition term + zqxfg[ncldql - 1, jl - 1] = zqxfg[ncldql - 1, jl - 1] + psupsat[jk - 1, jl - 1] + # Store cloud budget diagnostics if required + else: + # Turn supersaturation into ice water + zsolqa[ncldqi - 1, ncldqi - 1, jl - 1] = zsolqa[ncldqi - 1, ncldqi - 1, jl - 1] + psupsat[jk - 1, jl - 1] + zpsupsatsrce[ncldqi - 1, jl - 1] = psupsat[jk - 1, jl - 1] + # Add ice to first guess for deposition term + zqxfg[ncldqi - 1, jl - 1] = zqxfg[ncldqi - 1, jl - 1] + psupsat[jk - 1, jl - 1] + # Store cloud budget diagnostics if required + + # Increase cloud amount using RKOOPTAU timescale + zsolac[jl - 1] = (1.0 - za[jk - 1, jl - 1])*zfaci + # Store cloud budget diagnostics if required + + # on JL + + #--------------------------------------------------------------------- + # 3.2 DETRAINMENT FROM CONVECTION + #--------------------------------------------------------------------- + # * Diagnostic T-ice/liq split retained for convection + # Note: This link is now flexible and a future convection + # scheme can detrain explicit seperate budgets of: + # cloud water, ice, rain and snow + # * There is no (1-ZA) multiplier term on the cloud detrainment + # term, since is now written in mass-flux terms + # [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + #--------------------------------------------------------------------- + if jk < klev and jk >= yrecldp.ncldtop: + + for jl in range(kidia, kfdia + 1): + + plude[jk - 1, jl - 1] = plude[jk - 1, jl - 1]*zdtgdp[jl - 1] + + if ldcum[jl - 1] and plude[jk - 1, jl - 1] > yrecldp.rlmin and plu[jk + 1 - 1, jl - 1] > zepsec: + + zsolac[jl - 1] = zsolac[jl - 1] + plude[jk - 1, jl - 1] / plu[jk + 1 - 1, jl - 1] + # *diagnostic temperature split* + zalfaw = zfoealfa[jk - 1, jl - 1] + zconvsrce[ncldql - 1, jl - 1] = zalfaw*plude[jk - 1, jl - 1] + zconvsrce[ncldqi - 1, jl - 1] = (1.0 - zalfaw)*plude[jk - 1, jl - 1] + zsolqa[ncldql - 1, ncldql - 1, jl - 1] = zsolqa[ncldql - 1, ncldql - 1, jl - 1] + zconvsrce[ncldql - 1, jl - 1] + zsolqa[ncldqi - 1, ncldqi - 1, jl - 1] = zsolqa[ncldqi - 1, ncldqi - 1, jl - 1] + zconvsrce[ncldqi - 1, jl - 1] + + else: + + plude[jk - 1, jl - 1] = 0.0 + + # *convective snow detrainment source + if ldcum[jl - 1]: + zsolqa[ncldqs - 1, ncldqs - 1, jl - 1] = zsolqa[ncldqs - 1, ncldqs - 1, jl - 1] + psnde[jk - 1, jl - 1]*zdtgdp[jl - 1] + + + # JK yrecldp.ncldtop: + + for jl in range(kidia, kfdia + 1): + zmf[jl - 1] = max(0.0, (pmfu[jk - 1, jl - 1] + pmfd[jk - 1, jl - 1])*zdtgdp[jl - 1]) + zacust[jl - 1] = zmf[jl - 1]*zanewm1[jl - 1] + + for jm in range(1, nclv + 1): + if not llfall[jm - 1] and iphase[jm - 1] > 0: + for jl in range(kidia, kfdia + 1): + zlcust[jm - 1, jl - 1] = zmf[jl - 1]*zqxnm1[jm - 1, jl - 1] + # record total flux for enthalpy budget: + zconvsrce[jm - 1, jl - 1] = zconvsrce[jm - 1, jl - 1] + zlcust[jm - 1, jl - 1] + + # Now have to work out how much liquid evaporates at arrival point + # since there is no prognostic memory for in-cloud humidity, i.e. + # we always assume cloud is saturated. + + for jl in range(kidia, kfdia + 1): + zdtdp = zrdcp*0.5*(ztp1[jk - 1 - 1, jl - 1] + ztp1[jk - 1, jl - 1]) / paph[jk - 1, jl - 1] + zdtforc = zdtdp*(pap[jk - 1, jl - 1] - pap[jk - 1 - 1, jl - 1]) + #[#Note: Diagnostic mixed phase should be replaced below] + zdqs[jl - 1] = zanewm1[jl - 1]*zdtforc*zdqsmixdt[jl - 1] + + for jm in range(1, nclv + 1): + if not llfall[jm - 1] and iphase[jm - 1] > 0: + for jl in range(kidia, kfdia + 1): + zlfinal = max(0.0, zlcust[jm - 1, jl - 1] - zdqs[jl - 1]) #lim to zero + # no supersaturation allowed incloud ---V + zevap = min((zlcust[jm - 1, jl - 1] - zlfinal), zevaplimmix[jl - 1]) + # ZEVAP=0.0_JPRB + zlfinal = zlcust[jm - 1, jl - 1] - zevap + zlfinalsum[jl - 1] = zlfinalsum[jl - 1] + zlfinal # sum + + zsolqa[jm - 1, jm - 1, jl - 1] = zsolqa[jm - 1, jm - 1, jl - 1] + zlcust[jm - 1, jl - 1] # whole sum + zsolqa[jm - 1, ncldqv - 1, jl - 1] = zsolqa[jm - 1, ncldqv - 1, jl - 1] + zevap + zsolqa[ncldqv - 1, jm - 1, jl - 1] = zsolqa[ncldqv - 1, jm - 1, jl - 1] - zevap + + # Reset the cloud contribution if no cloud water survives to this level: + for jl in range(kidia, kfdia + 1): + if zlfinalsum[jl - 1] < zepsec: + zacust[jl - 1] = 0.0 + zsolac[jl - 1] = zsolac[jl - 1] + zacust[jl - 1] + + # on JK>NCLDTOP + + #--------------------------------------------------------------------- + # Subsidence sink of cloud to the layer below + # (Implicit - re. CFL limit on convective mass flux) + #--------------------------------------------------------------------- + + for jl in range(kidia, kfdia + 1): + + if jk < klev: + + zmfdn = max(0.0, (pmfu[jk + 1 - 1, jl - 1] + pmfd[jk + 1 - 1, jl - 1])*zdtgdp[jl - 1]) + + zsolab[jl - 1] = zsolab[jl - 1] + zmfdn + zsolqb[ncldql - 1, ncldql - 1, jl - 1] = zsolqb[ncldql - 1, ncldql - 1, jl - 1] + zmfdn + zsolqb[ncldqi - 1, ncldqi - 1, jl - 1] = zsolqb[ncldqi - 1, ncldqi - 1, jl - 1] + zmfdn + + # Record sink for cloud budget and enthalpy budget diagnostics + zconvsink[ncldql - 1, jl - 1] = zmfdn + zconvsink[ncldqi - 1, jl - 1] = zmfdn + + + + #---------------------------------------------------------------------- + # 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + #---------------------------------------------------------------------- + # NOTE: In default tiedtke scheme this process decreases the cloud + # area but leaves the specific cloud water content + # within clouds unchanged + #---------------------------------------------------------------------- + + # ------------------------------ + # Define turbulent erosion rate + # ------------------------------ + for jl in range(kidia, kfdia + 1): + zldifdt[jl - 1] = yrecldp.rcldiff*ptsphy #original version + #Increase by factor of 5 for convective points + if ktype[jl - 1] > 0 and plude[jk - 1, jl - 1] > zepsec: + zldifdt[jl - 1] = yrecldp.rcldiff_convi*zldifdt[jl - 1] + + # At the moment, works on mixed RH profile and partitioned ice/liq fraction + # so that it is similar to previous scheme + # Should apply RHw for liquid cloud and RHi for ice cloud separately + for jl in range(kidia, kfdia + 1): + if zli[jk - 1, jl - 1] > zepsec: + # Calculate environmental humidity + # ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + # & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + # ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ze = zldifdt[jl - 1]*max(zqsmix[jk - 1, jl - 1] - zqx[ncldqv - 1, jk - 1, jl - 1], 0.0) + zleros = za[jk - 1, jl - 1]*ze + zleros = min(zleros, zevaplimmix[jl - 1]) + zleros = min(zleros, zli[jk - 1, jl - 1]) + zaeros = zleros / zlicld[jl - 1] #if linear term + + # Erosion is -ve LINEAR in L,A + zsolac[jl - 1] = zsolac[jl - 1] - zaeros #linear + + zsolqa[ncldql - 1, ncldqv - 1, jl - 1] = zsolqa[ncldql - 1, ncldqv - 1, jl - 1] + zliqfrac[jk - 1, jl - 1]*zleros + zsolqa[ncldqv - 1, ncldql - 1, jl - 1] = zsolqa[ncldqv - 1, ncldql - 1, jl - 1] - zliqfrac[jk - 1, jl - 1]*zleros + zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] + zicefrac[jk - 1, jl - 1]*zleros + zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] - zicefrac[jk - 1, jl - 1]*zleros + + + #---------------------------------------------------------------------- + # 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + #---------------------------------------------------------------------- + # calculate dqs/dt + # Note: For the separate prognostic Qi and Ql, one would ideally use + # Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + # forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + # These would then instantaneous freeze if T<-38C or lead to ice growth + # by deposition in warmer mixed phase clouds. However, since we do + # not have a separate prognostic equation for in-cloud humidity or a + # statistical scheme approach in place, the depositional growth of ice + # in the mixed phase can not be modelled and we resort to supersaturation + # wrt ice instanteously converting to ice over one timestep + # (see Tompkins et al. QJRMS 2007 for details) + # Thus for the initial implementation the diagnostic mixed phase is + # retained for the moment, and the level of approximation noted. + #---------------------------------------------------------------------- + + for jl in range(kidia, kfdia + 1): + zdtdp = zrdcp*ztp1[jk - 1, jl - 1] / pap[jk - 1, jl - 1] + zdpmxdt = zdp[jl - 1]*zqtmst + zmfdn = 0.0 + if jk < klev: + zmfdn = pmfu[jk + 1 - 1, jl - 1] + pmfd[jk + 1 - 1, jl - 1] + zwtot = pvervel[jk - 1, jl - 1] + 0.5*ydcst.rg*(pmfu[jk - 1, jl - 1] + pmfd[jk - 1, jl - 1] + zmfdn) + zwtot = min(zdpmxdt, max(-zdpmxdt, zwtot)) + zzzdt = phrsw[jk - 1, jl - 1] + phrlw[jk - 1, jl - 1] + zdtdiab = min(zdpmxdt*zdtdp, max(-zdpmxdt*zdtdp, zzzdt))*ptsphy + ydthf.ralfdcp*zldefr[jl - 1] + # Note: ZLDEFR should be set to the difference between the mixed phase functions + # in the convection and cloud scheme, but this is not calculated, so is zero and + # the functions must be the same + zdtforc = zdtdp*zwtot*ptsphy + zdtdiab + zqold[jl - 1] = zqsmix[jk - 1, jl - 1] + ztold[jl - 1] = ztp1[jk - 1, jl - 1] + ztp1[jk - 1, jl - 1] = ztp1[jk - 1, jl - 1] + zdtforc + ztp1[jk - 1, jl - 1] = max(ztp1[jk - 1, jl - 1], 160.0) + llflag[jl - 1] = True + + # Formerly a call to CUADJTQ(..., ICALL=5) + for jl in range(kidia, kfdia + 1): + zqp = 1.0 / pap[jk - 1, jl - 1] + zqsat = foeewm(ztp1[jk - 1, jl - 1])*zqp + zqsat = min(0.5, zqsat) + zcor = 1.0 / (1.0 - ydcst.retv*zqsat) + zqsat = zqsat*zcor + zcond = (zqsmix[jk - 1, jl - 1] - zqsat) / (1.0 + zqsat*zcor*foedem(ztp1[jk - 1, jl - 1])) + ztp1[jk - 1, jl - 1] = ztp1[jk - 1, jl - 1] + foeldcpm(ztp1[jk - 1, jl - 1])*zcond + zqsmix[jk - 1, jl - 1] = zqsmix[jk - 1, jl - 1] - zcond + zqsat = foeewm(ztp1[jk - 1, jl - 1])*zqp + zqsat = min(0.5, zqsat) + zcor = 1.0 / (1.0 - ydcst.retv*zqsat) + zqsat = zqsat*zcor + zcond1 = (zqsmix[jk - 1, jl - 1] - zqsat) / (1.0 + zqsat*zcor*foedem(ztp1[jk - 1, jl - 1])) + ztp1[jk - 1, jl - 1] = ztp1[jk - 1, jl - 1] + foeldcpm(ztp1[jk - 1, jl - 1])*zcond1 + zqsmix[jk - 1, jl - 1] = zqsmix[jk - 1, jl - 1] - zcond1 + + for jl in range(kidia, kfdia + 1): + zdqs[jl - 1] = zqsmix[jk - 1, jl - 1] - zqold[jl - 1] + zqsmix[jk - 1, jl - 1] = zqold[jl - 1] + ztp1[jk - 1, jl - 1] = ztold[jl - 1] + + #---------------------------------------------------------------------- + # 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + # ---------------------------------------------------------------------- + # Erosion term is LINEAR in L + # Changed to be uniform distribution in cloud region + + for jl in range(kidia, kfdia + 1): + + # Previous function based on DELTA DISTRIBUTION in cloud: + if zdqs[jl - 1] > 0.0: + # If subsidence evaporation term is turned off, then need to use updated + # liquid and cloud here? + # ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + zlevap = za[jk - 1, jl - 1]*min(zdqs[jl - 1], zlicld[jl - 1]) + zlevap = min(zlevap, zevaplimmix[jl - 1]) + zlevap = min(zlevap, max(zqsmix[jk - 1, jl - 1] - zqx[ncldqv - 1, jk - 1, jl - 1], 0.0)) + + # For first guess call + zlevapl[jl - 1] = zliqfrac[jk - 1, jl - 1]*zlevap + zlevapi[jl - 1] = zicefrac[jk - 1, jl - 1]*zlevap + + zsolqa[ncldql - 1, ncldqv - 1, jl - 1] = zsolqa[ncldql - 1, ncldqv - 1, jl - 1] + zliqfrac[jk - 1, jl - 1]*zlevap + zsolqa[ncldqv - 1, ncldql - 1, jl - 1] = zsolqa[ncldqv - 1, ncldql - 1, jl - 1] - zliqfrac[jk - 1, jl - 1]*zlevap + + zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] + zicefrac[jk - 1, jl - 1]*zlevap + zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] - zicefrac[jk - 1, jl - 1]*zlevap + + + + #---------------------------------------------------------------------- + # 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + #---------------------------------------------------------------------- + # (1) Increase of cloud water in existing clouds + for jl in range(kidia, kfdia + 1): + if za[jk - 1, jl - 1] > zepsec and zdqs[jl - 1] <= -yrecldp.rlmin: + + zlcond1[jl - 1] = max(-zdqs[jl - 1], 0.0) #new limiter + + #old limiter (significantly improves upper tropospheric humidity rms) + if za[jk - 1, jl - 1] > 0.99: + zcor = 1.0 / (1.0 - ydcst.retv*zqsmix[jk - 1, jl - 1]) + zcdmax = (zqx[ncldqv - 1, jk - 1, jl - 1] - zqsmix[jk - 1, jl - 1]) / (1.0 + zcor*zqsmix[jk - 1, jl - 1]*foedem(ztp1[jk - 1, jl - 1])) + else: + zcdmax = (zqx[ncldqv - 1, jk - 1, jl - 1] - za[jk - 1, jl - 1]*zqsmix[jk - 1, jl - 1]) / za[jk - 1, jl - 1] + zlcond1[jl - 1] = max(min(zlcond1[jl - 1], zcdmax), 0.0) + # end old limiter + + zlcond1[jl - 1] = za[jk - 1, jl - 1]*zlcond1[jl - 1] + if zlcond1[jl - 1] < yrecldp.rlmin: + zlcond1[jl - 1] = 0.0 + + #------------------------------------------------------------------------- + # All increase goes into liquid unless so cold cloud homogeneously freezes + # Include new liquid formation in first guess value, otherwise liquid + # remains at cold temperatures until next timestep. + #------------------------------------------------------------------------- + if ztp1[jk - 1, jl - 1] > yrecldp.rthomo: + zsolqa[ncldqv - 1, ncldql - 1, jl - 1] = zsolqa[ncldqv - 1, ncldql - 1, jl - 1] + zlcond1[jl - 1] + zsolqa[ncldql - 1, ncldqv - 1, jl - 1] = zsolqa[ncldql - 1, ncldqv - 1, jl - 1] - zlcond1[jl - 1] + zqxfg[ncldql - 1, jl - 1] = zqxfg[ncldql - 1, jl - 1] + zlcond1[jl - 1] + else: + zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] + zlcond1[jl - 1] + zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] - zlcond1[jl - 1] + zqxfg[ncldqi - 1, jl - 1] = zqxfg[ncldqi - 1, jl - 1] + zlcond1[jl - 1] + + # (2) Generation of new clouds (da/dt>0) + + for jl in range(kidia, kfdia + 1): + + if zdqs[jl - 1] <= -yrecldp.rlmin and za[jk - 1, jl - 1] < 1.0 - zepsec: + + #--------------------------- + # Critical relative humidity + #--------------------------- + zrhc = yrecldp.ramid + zsigk = pap[jk - 1, jl - 1] / paph[klev + 1 - 1, jl - 1] + # Increase RHcrit to 1.0 towards the surface (eta>0.8) + if zsigk > 0.8: + zrhc = yrecldp.ramid + (1.0 - yrecldp.ramid)*((zsigk - 0.8) / 0.2)**2 + + # Commented out for CY37R1 to reduce humidity in high trop and strat + # ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + # ZBOTT=ZTRPAUS(JL)+0.2_JPRB + # IF(ZSIGK < ZBOTT) THEN + # ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + # ENDIF + + #--------------------------- + # Supersaturation options + #--------------------------- + if yrecldp.nssopt == 0: + # No scheme + zqe = (zqx[ncldqv - 1, jk - 1, jl - 1] - za[jk - 1, jl - 1]*zqsice[jk - 1, jl - 1]) / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + zqe = max(0.0, zqe) + elif yrecldp.nssopt == 1: + # Tompkins + zqe = (zqx[ncldqv - 1, jk - 1, jl - 1] - za[jk - 1, jl - 1]*zqsice[jk - 1, jl - 1]) / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + zqe = max(0.0, zqe) + elif yrecldp.nssopt == 2: + # Lohmann and Karcher + zqe = zqx[ncldqv - 1, jk - 1, jl - 1] + elif yrecldp.nssopt == 3: + # Gierens + zqe = zqx[ncldqv - 1, jk - 1, jl - 1] + zli[jk - 1, jl - 1] + + if ztp1[jk - 1, jl - 1] >= ydcst.rtt or yrecldp.nssopt == 0: + # No ice supersaturation allowed + zfac = 1.0 + else: + # Ice supersaturation + zfac = zfokoop[jl - 1] + + if zqe >= zrhc*zqsice[jk - 1, jl - 1]*zfac and zqe < zqsice[jk - 1, jl - 1]*zfac: + # note: not **2 on 1-a term if ZQE is used. + # Added correction term ZFAC to numerator 15/03/2010 + zacond = -(1.0 - za[jk - 1, jl - 1])*zfac*zdqs[jl - 1] / max(2.0*(zfac*zqsice[jk - 1, jl - 1] - zqe), zepsec) + + zacond = min(zacond, 1.0 - za[jk - 1, jl - 1]) #PUT THE LIMITER BACK + + # Linear term: + # Added correction term ZFAC 15/03/2010 + zlcond2[jl - 1] = -zfac*zdqs[jl - 1]*0.5*zacond #mine linear + + # new limiter formulation + zzdl = 2.0*(zfac*zqsice[jk - 1, jl - 1] - zqe) / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + # Added correction term ZFAC 15/03/2010 + if zfac*zdqs[jl - 1] < -zzdl: + # ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + zlcondlim = (za[jk - 1, jl - 1] - 1.0)*zfac*zdqs[jl - 1] - zfac*zqsice[jk - 1, jl - 1] + zqx[ncldqv - 1, jk - 1, jl - 1] + zlcond2[jl - 1] = min(zlcond2[jl - 1], zlcondlim) + zlcond2[jl - 1] = max(zlcond2[jl - 1], 0.0) + + if zlcond2[jl - 1] < yrecldp.rlmin or (1.0 - za[jk - 1, jl - 1]) < zepsec: + zlcond2[jl - 1] = 0.0 + zacond = 0.0 + if zlcond2[jl - 1] == 0.0: + zacond = 0.0 + + # Large-scale generation is LINEAR in A and LINEAR in L + zsolac[jl - 1] = zsolac[jl - 1] + zacond #linear + + #------------------------------------------------------------------------ + # All increase goes into liquid unless so cold cloud homogeneously freezes + # Include new liquid formation in first guess value, otherwise liquid + # remains at cold temperatures until next timestep. + #------------------------------------------------------------------------ + if ztp1[jk - 1, jl - 1] > yrecldp.rthomo: + zsolqa[ncldqv - 1, ncldql - 1, jl - 1] = zsolqa[ncldqv - 1, ncldql - 1, jl - 1] + zlcond2[jl - 1] + zsolqa[ncldql - 1, ncldqv - 1, jl - 1] = zsolqa[ncldql - 1, ncldqv - 1, jl - 1] - zlcond2[jl - 1] + zqxfg[ncldql - 1, jl - 1] = zqxfg[ncldql - 1, jl - 1] + zlcond2[jl - 1] + else: + # homogeneous freezing + zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqi - 1, jl - 1] + zlcond2[jl - 1] + zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqi - 1, ncldqv - 1, jl - 1] - zlcond2[jl - 1] + zqxfg[ncldqi - 1, jl - 1] = zqxfg[ncldqi - 1, jl - 1] + zlcond2[jl - 1] + + + #---------------------------------------------------------------------- + # 3.7 Growth of ice by vapour deposition + #---------------------------------------------------------------------- + # Following Rotstayn et al. 2001: + # does not use the ice nuclei number from cloudaer.F90 + # but rather a simple Meyers et al. 1992 form based on the + # supersaturation and assuming clouds are saturated with + # respect to liquid water (well mixed), (or Koop adjustment) + # Growth considered as sink of liquid water if present so + # Bergeron-Findeisen adjustment in autoconversion term no longer needed + #---------------------------------------------------------------------- + + #-------------------------------------------------------- + #- + #- Ice deposition following Rotstayn et al. (2001) + #- (monodisperse ice particle size distribution) + #- + #-------------------------------------------------------- + if idepice == 1: + + for jl in range(kidia, kfdia + 1): + + #-------------------------------------------------------------- + # Calculate distance from cloud top + # defined by cloudy layer below a layer with cloud frac <0.01 + # ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + #-------------------------------------------------------------- + + if za[jk - 1 - 1, jl - 1] < yrecldp.rcldtopcf and za[jk - 1, jl - 1] >= yrecldp.rcldtopcf: + zcldtopdist[jl - 1] = 0.0 + else: + zcldtopdist[jl - 1] = zcldtopdist[jl - 1] + zdp[jl - 1] / (zrho[jl - 1]*ydcst.rg) + + #-------------------------------------------------------------- + # only treat depositional growth if liquid present. due to fact + # that can not model ice growth from vapour without additional + # in-cloud water vapour variable + #-------------------------------------------------------------- + if ztp1[jk - 1, jl - 1] < ydcst.rtt and zqxfg[ncldql - 1, jl - 1] > yrecldp.rlmin: + # T<273K + + zvpice = foeeice(ztp1[jk - 1, jl - 1])*ydcst.rv / ydcst.rd + zvpliq = zvpice*zfokoop[jl - 1] + zicenuclei[jl - 1] = 1000.0*np.exp(12.96*(zvpliq - zvpice) / zvpliq - 0.639) + + #------------------------------------------------ + # 2.4e-2 is conductivity of air + # 8.8 = 700**1/3 = density of ice to the third + #------------------------------------------------ + zadd = ydcst.rlstt*(ydcst.rlstt / (ydcst.rv*ztp1[jk - 1, jl - 1]) - 1.0) / (2.4E-2*ztp1[jk - 1, jl - 1]) + zbdd = ydcst.rv*ztp1[jk - 1, jl - 1]*pap[jk - 1, jl - 1] / (2.21*zvpice) + zcvds = 7.8*(zicenuclei[jl - 1] / zrho[jl - 1])**0.666*(zvpliq - zvpice) / (8.87*(zadd + zbdd)*zvpice) + + #----------------------------------------------------- + # RICEINIT=1.E-12_JPRB is initial mass of ice particle + #----------------------------------------------------- + zice0 = max(zicecld[jl - 1], zicenuclei[jl - 1]*yrecldp.riceinit / zrho[jl - 1]) + + #------------------ + # new value of ice: + #------------------ + zinew = (0.666*zcvds*ptsphy + zice0**0.666)**1.5 + + #--------------------------- + # grid-mean deposition rate: + #--------------------------- + zdepos = max(za[jk - 1, jl - 1]*(zinew - zice0), 0.0) + + #-------------------------------------------------------------------- + # Limit deposition to liquid water amount + # If liquid is all frozen, ice would use up reservoir of water + # vapour in excess of ice saturation mixing ratio - However this + # can not be represented without a in-cloud humidity variable. Using + # the grid-mean humidity would imply a large artificial horizontal + # flux from the clear sky to the cloudy area. We thus rely on the + # supersaturation check to clean up any remaining supersaturation + #-------------------------------------------------------------------- + zdepos = min(zdepos, zqxfg[ncldql - 1, jl - 1]) # limit to liquid water amount + + #-------------------------------------------------------------------- + # At top of cloud, reduce deposition rate near cloud top to account for + # small scale turbulent processes, limited ice nucleation and ice fallout + #-------------------------------------------------------------------- + # ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + # Change to include dependence on ice nuclei concentration + # to increase deposition rate with decreasing temperatures + zinfactor = min(zicenuclei[jl - 1] / 15000., 1.0) + zdepos = zdepos*min(zinfactor + (1.0 - zinfactor)*(yrecldp.rdepliqrefrate + zcldtopdist[jl - 1] / yrecldp.rdepliqrefdepth), 1.0) + + #-------------- + # add to matrix + #-------------- + zsolqa[ncldql - 1, ncldqi - 1, jl - 1] = zsolqa[ncldql - 1, ncldqi - 1, jl - 1] + zdepos + zsolqa[ncldqi - 1, ncldql - 1, jl - 1] = zsolqa[ncldqi - 1, ncldql - 1, jl - 1] - zdepos + zqxfg[ncldqi - 1, jl - 1] = zqxfg[ncldqi - 1, jl - 1] + zdepos + zqxfg[ncldql - 1, jl - 1] = zqxfg[ncldql - 1, jl - 1] - zdepos + + + #-------------------------------------------------------- + #- + #- Ice deposition assuming ice PSD + #- + #-------------------------------------------------------- + elif idepice == 2: + + for jl in range(kidia, kfdia + 1): + + #-------------------------------------------------------------- + # Calculate distance from cloud top + # defined by cloudy layer below a layer with cloud frac <0.01 + # ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + #-------------------------------------------------------------- + + if za[jk - 1 - 1, jl - 1] < yrecldp.rcldtopcf and za[jk - 1, jl - 1] >= yrecldp.rcldtopcf: + zcldtopdist[jl - 1] = 0.0 + else: + zcldtopdist[jl - 1] = zcldtopdist[jl - 1] + zdp[jl - 1] / (zrho[jl - 1]*ydcst.rg) + + #-------------------------------------------------------------- + # only treat depositional growth if liquid present. due to fact + # that can not model ice growth from vapour without additional + # in-cloud water vapour variable + #-------------------------------------------------------------- + if ztp1[jk - 1, jl - 1] < ydcst.rtt and zqxfg[ncldql - 1, jl - 1] > yrecldp.rlmin: + # T<273K + + zvpice = foeeice(ztp1[jk - 1, jl - 1])*ydcst.rv / ydcst.rd + zvpliq = zvpice*zfokoop[jl - 1] + zicenuclei[jl - 1] = 1000.0*np.exp(12.96*(zvpliq - zvpice) / zvpliq - 0.639) + + #----------------------------------------------------- + # RICEINIT=1.E-12_JPRB is initial mass of ice particle + #----------------------------------------------------- + zice0 = max(zicecld[jl - 1], zicenuclei[jl - 1]*yrecldp.riceinit / zrho[jl - 1]) + + # Particle size distribution + ztcg = 1.0 + zfacx1i = 1.0 + + zaplusb = yrecldp.rcl_apb1*zvpice - yrecldp.rcl_apb2*zvpice*ztp1[jk - 1, jl - 1] + pap[jk - 1, jl - 1]*yrecldp.rcl_apb3*ztp1[jk - 1, jl - 1]**3. + zcorrfac = (1.0 / zrho[jl - 1])**0.5 + zcorrfac2 = ((ztp1[jk - 1, jl - 1] / 273.0)**1.5)*(393.0 / (ztp1[jk - 1, jl - 1] + 120.0)) + + zpr02 = zrho[jl - 1]*zice0*yrecldp.rcl_const1i / (ztcg*zfacx1i) + + zterm1 = (zvpliq - zvpice)*ztp1[jk - 1, jl - 1]**2.0*zvpice*zcorrfac2*ztcg*yrecldp.rcl_const2i*zfacx1i / (zrho[jl - 1]*zaplusb*zvpice) + zterm2 = 0.65*yrecldp.rcl_const6i*zpr02**yrecldp.rcl_const4i + yrecldp.rcl_const3i*zcorrfac**0.5*zrho[jl - 1]**0.5*zpr02**yrecldp.rcl_const5i / zcorrfac2**0.5 + + zdepos = max(za[jk - 1, jl - 1]*zterm1*zterm2*ptsphy, 0.0) + + #-------------------------------------------------------------------- + # Limit deposition to liquid water amount + # If liquid is all frozen, ice would use up reservoir of water + # vapour in excess of ice saturation mixing ratio - However this + # can not be represented without a in-cloud humidity variable. Using + # the grid-mean humidity would imply a large artificial horizontal + # flux from the clear sky to the cloudy area. We thus rely on the + # supersaturation check to clean up any remaining supersaturation + #-------------------------------------------------------------------- + zdepos = min(zdepos, zqxfg[ncldql - 1, jl - 1]) # limit to liquid water amount + + #-------------------------------------------------------------------- + # At top of cloud, reduce deposition rate near cloud top to account for + # small scale turbulent processes, limited ice nucleation and ice fallout + #-------------------------------------------------------------------- + # Change to include dependence on ice nuclei concentration + # to increase deposition rate with decreasing temperatures + zinfactor = min(zicenuclei[jl - 1] / 15000., 1.0) + zdepos = zdepos*min(zinfactor + (1.0 - zinfactor)*(yrecldp.rdepliqrefrate + zcldtopdist[jl - 1] / yrecldp.rdepliqrefdepth), 1.0) + + #-------------- + # add to matrix + #-------------- + zsolqa[ncldql - 1, ncldqi - 1, jl - 1] = zsolqa[ncldql - 1, ncldqi - 1, jl - 1] + zdepos + zsolqa[ncldqi - 1, ncldql - 1, jl - 1] = zsolqa[ncldqi - 1, ncldql - 1, jl - 1] - zdepos + zqxfg[ncldqi - 1, jl - 1] = zqxfg[ncldqi - 1, jl - 1] + zdepos + zqxfg[ncldql - 1, jl - 1] = zqxfg[ncldql - 1, jl - 1] - zdepos + + # on IDEPICE + + ####################################################################### + # 4 *** PRECIPITATION PROCESSES *** + ####################################################################### + + #---------------------------------- + # revise in-cloud consensate amount + #---------------------------------- + for jl in range(kidia, kfdia + 1): + ztmpa = 1.0 / max(za[jk - 1, jl - 1], zepsec) + zliqcld[jl - 1] = zqxfg[ncldql - 1, jl - 1]*ztmpa + zicecld[jl - 1] = zqxfg[ncldqi - 1, jl - 1]*ztmpa + zlicld[jl - 1] = zliqcld[jl - 1] + zicecld[jl - 1] + + #---------------------------------------------------------------------- + # 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + # now that rain, snow, graupel species are prognostic + # the precipitation flux can be defined directly level by level + # There is no vertical memory required from the flux variable + #---------------------------------------------------------------------- + + for jm in range(1, nclv + 1): + if llfall[jm - 1] or jm == ncldqi: + for jl in range(kidia, kfdia + 1): + #------------------------ + # source from layer above + #------------------------ + if jk > yrecldp.ncldtop: + zfallsrce[jm - 1, jl - 1] = zpfplsx[jm - 1, jk - 1, jl - 1]*zdtgdp[jl - 1] + zsolqa[jm - 1, jm - 1, jl - 1] = zsolqa[jm - 1, jm - 1, jl - 1] + zfallsrce[jm - 1, jl - 1] + zqxfg[jm - 1, jl - 1] = zqxfg[jm - 1, jl - 1] + zfallsrce[jm - 1, jl - 1] + # use first guess precip----------V + zqpretot[jl - 1] = zqpretot[jl - 1] + zqxfg[jm - 1, jl - 1] + #------------------------------------------------- + # sink to next layer, constant fall speed + #------------------------------------------------- + # if aerosol effect then override + # note that for T>233K this is the same as above. + if yrecldp.laericesed and jm == ncldqi: + zre_ice = pre_ice[jk - 1, jl - 1] + # The exponent value is from + # Morrison et al. JAS 2005 Appendix + zvqx[ncldqi - 1] = 0.002*zre_ice**1.0 + zfall = zvqx[jm - 1]*zrho[jl - 1] + #------------------------------------------------- + # modified by Heymsfield and Iaquinta JAS 2000 + #------------------------------------------------- + # ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + # &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + zfallsink[jm - 1, jl - 1] = zdtgdp[jl - 1]*zfall + # Cloud budget diagnostic stored at end as implicit + # jl + # LLFALL + # jm + + #--------------------------------------------------------------- + # Precip cover overlap using MAX-RAN Overlap + # Since precipitation is now prognostic we must + # 1) apply an arbitrary minimum coverage (0.3) if precip>0 + # 2) abandon the 2-flux clr/cld treatment + # 3) Thus, since we have no memory of the clear sky precip + # fraction, we mimic the previous method by reducing + # ZCOVPTOT(JL), which has the memory, proportionally with + # the precip evaporation rate, taking cloud fraction + # into account + # #3 above leads to much smoother vertical profiles of + # precipitation fraction than the Klein-Jakob scheme which + # monotonically increases precip fraction and then resets + # it to zero in a step function once clear-sky precip reaches + # zero. + #--------------------------------------------------------------- + for jl in range(kidia, kfdia + 1): + if zqpretot[jl - 1] > zepsec: + zcovptot[jl - 1] = 1.0 - ((1.0 - zcovptot[jl - 1])*(1.0 - max(za[jk - 1, jl - 1], za[jk - 1 - 1, jl - 1])) / (1.0 - min(za[jk - 1 - 1, jl - 1], 1.0 - 1.E-06))) + zcovptot[jl - 1] = max(zcovptot[jl - 1], yrecldp.rcovpmin) + zcovpclr[jl - 1] = max(0.0, zcovptot[jl - 1] - za[jk - 1, jl - 1]) # clear sky proportion + zraincld[jl - 1] = zqxfg[ncldqr - 1, jl - 1] / zcovptot[jl - 1] + zsnowcld[jl - 1] = zqxfg[ncldqs - 1, jl - 1] / zcovptot[jl - 1] + zcovpmax[jl - 1] = max(zcovptot[jl - 1], zcovpmax[jl - 1]) + else: + zraincld[jl - 1] = 0.0 + zsnowcld[jl - 1] = 0.0 + zcovptot[jl - 1] = 0.0 # no flux - reset cover + zcovpclr[jl - 1] = 0.0 # reset clear sky proportion + zcovpmax[jl - 1] = 0.0 # reset max cover for ZZRH calc + + #---------------------------------------------------------------------- + # 4.3a AUTOCONVERSION TO SNOW + #---------------------------------------------------------------------- + for jl in range(kidia, kfdia + 1): + + if ztp1[jk - 1, jl - 1] <= ydcst.rtt: + #----------------------------------------------------- + # Snow Autoconversion rate follow Lin et al. 1983 + #----------------------------------------------------- + if zicecld[jl - 1] > zepsec: + + zzco = ptsphy*yrecldp.rsnowlin1*np.exp(yrecldp.rsnowlin2*(ztp1[jk - 1, jl - 1] - ydcst.rtt)) + + if yrecldp.laericeauto: + zlcrit = picrit_aer[jk - 1, jl - 1] + # 0.3 = N**0.333 with N=0.027 + zzco = zzco*(yrecldp.rnice / pnice[jk - 1, jl - 1])**0.333 + else: + zlcrit = yrecldp.rlcritsnow + + zsnowaut[jl - 1] = zzco*(1.0 - np.exp(-(zicecld[jl - 1] / zlcrit)**2)) + zsolqb[ncldqi - 1, ncldqs - 1, jl - 1] = zsolqb[ncldqi - 1, ncldqs - 1, jl - 1] + zsnowaut[jl - 1] + + + #---------------------------------------------------------------------- + # 4.3b AUTOCONVERSION WARM CLOUDS + # Collection and accretion will require separate treatment + # but for now we keep this simple treatment + #---------------------------------------------------------------------- + + if zliqcld[jl - 1] > zepsec: + + #-------------------------------------------------------- + #- + #- Warm-rain process follow Sundqvist (1989) + #- + #-------------------------------------------------------- + if iwarmrain == 1: + + zzco = yrecldp.rkconv*ptsphy + + if yrecldp.laerliqautolsp: + zlcrit = plcrit_aer[jk - 1, jl - 1] + # 0.3 = N**0.333 with N=125 cm-3 + zzco = zzco*(yrecldp.rccn / pccn[jk - 1, jl - 1])**0.333 + else: + # Modify autoconversion threshold dependent on: + # land (polluted, high CCN, smaller droplets, higher threshold) + # sea (clean, low CCN, larger droplets, lower threshold) + if plsm[jl - 1] > 0.5: + zlcrit = yrecldp.rclcrit_land # land + else: + zlcrit = yrecldp.rclcrit_sea # ocean + + #------------------------------------------------------------------ + # Parameters for cloud collection by rain and snow. + # Note that with new prognostic variable it is now possible + # to REPLACE this with an explicit collection parametrization + #------------------------------------------------------------------ + zprecip = (zpfplsx[ncldqs - 1, jk - 1, jl - 1] + zpfplsx[ncldqr - 1, jk - 1, jl - 1]) / max(zepsec, zcovptot[jl - 1]) + zcfpr = 1.0 + yrecldp.rprc1*np.sqrt(max(zprecip, 0.0)) + # ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + # &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + if yrecldp.laerliqcoll: + # 5.0 = N**0.333 with N=125 cm-3 + zcfpr = zcfpr*(yrecldp.rccn / pccn[jk - 1, jl - 1])**0.333 + + zzco = zzco*zcfpr + zlcrit = zlcrit / max(zcfpr, zepsec) + + if zliqcld[jl - 1] / zlcrit < 20.0: + # Security for exp for some compilers + zrainaut[jl - 1] = zzco*(1.0 - np.exp(-(zliqcld[jl - 1] / zlcrit)**2)) + else: + zrainaut[jl - 1] = zzco + + # rain freezes instantly + if ztp1[jk - 1, jl - 1] <= ydcst.rtt: + zsolqb[ncldql - 1, ncldqs - 1, jl - 1] = zsolqb[ncldql - 1, ncldqs - 1, jl - 1] + zrainaut[jl - 1] + else: + zsolqb[ncldql - 1, ncldqr - 1, jl - 1] = zsolqb[ncldql - 1, ncldqr - 1, jl - 1] + zrainaut[jl - 1] + + #-------------------------------------------------------- + #- + #- Warm-rain process follow Khairoutdinov and Kogan (2000) + #- + #-------------------------------------------------------- + elif iwarmrain == 2: + + if plsm[jl - 1] > 0.5: + # land + zconst = yrecldp.rcl_kk_cloud_num_land + zlcrit = yrecldp.rclcrit_land + else: + # ocean + zconst = yrecldp.rcl_kk_cloud_num_sea + zlcrit = yrecldp.rclcrit_sea + + if zliqcld[jl - 1] > zlcrit: + + zrainaut[jl - 1] = 1.5*za[jk - 1, jl - 1]*ptsphy*yrecldp.rcl_kkaau*zliqcld[jl - 1]**yrecldp.rcl_kkbauq*zconst**yrecldp.rcl_kkbaun + + zrainaut[jl - 1] = min(zrainaut[jl - 1], zqxfg[ncldql - 1, jl - 1]) + if zrainaut[jl - 1] < zepsec: + zrainaut[jl - 1] = 0.0 + + zrainacc[jl - 1] = 2.0*za[jk - 1, jl - 1]*ptsphy*yrecldp.rcl_kkaac*(zliqcld[jl - 1]*zraincld[jl - 1])**yrecldp.rcl_kkbac + + zrainacc[jl - 1] = min(zrainacc[jl - 1], zqxfg[ncldql - 1, jl - 1]) + if zrainacc[jl - 1] < zepsec: + zrainacc[jl - 1] = 0.0 + + else: + zrainaut[jl - 1] = 0.0 + zrainacc[jl - 1] = 0.0 + + # If temperature < 0, then autoconversion produces snow rather than rain + # Explicit + if ztp1[jk - 1, jl - 1] <= ydcst.rtt: + zsolqa[ncldql - 1, ncldqs - 1, jl - 1] = zsolqa[ncldql - 1, ncldqs - 1, jl - 1] + zrainaut[jl - 1] + zsolqa[ncldql - 1, ncldqs - 1, jl - 1] = zsolqa[ncldql - 1, ncldqs - 1, jl - 1] + zrainacc[jl - 1] + zsolqa[ncldqs - 1, ncldql - 1, jl - 1] = zsolqa[ncldqs - 1, ncldql - 1, jl - 1] - zrainaut[jl - 1] + zsolqa[ncldqs - 1, ncldql - 1, jl - 1] = zsolqa[ncldqs - 1, ncldql - 1, jl - 1] - zrainacc[jl - 1] + else: + zsolqa[ncldql - 1, ncldqr - 1, jl - 1] = zsolqa[ncldql - 1, ncldqr - 1, jl - 1] + zrainaut[jl - 1] + zsolqa[ncldql - 1, ncldqr - 1, jl - 1] = zsolqa[ncldql - 1, ncldqr - 1, jl - 1] + zrainacc[jl - 1] + zsolqa[ncldqr - 1, ncldql - 1, jl - 1] = zsolqa[ncldqr - 1, ncldql - 1, jl - 1] - zrainaut[jl - 1] + zsolqa[ncldqr - 1, ncldql - 1, jl - 1] = zsolqa[ncldqr - 1, ncldql - 1, jl - 1] - zrainacc[jl - 1] + + # on IWARMRAIN + + # on ZLIQCLD > ZEPSEC + + + #---------------------------------------------------------------------- + # RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + # only active if T<0degC and supercooled liquid water is present + # AND if not Sundquist autoconversion (as this includes riming) + #---------------------------------------------------------------------- + if iwarmrain > 1: + + for jl in range(kidia, kfdia + 1): + if ztp1[jk - 1, jl - 1] <= ydcst.rtt and zliqcld[jl - 1] > zepsec: + + # Fallspeed air density correction + zfallcorr = (yrecldp.rdensref / zrho[jl - 1])**0.4 + + #------------------------------------------------------------------ + # Riming of snow by cloud water - implicit in lwc + #------------------------------------------------------------------ + if zsnowcld[jl - 1] > zepsec and zcovptot[jl - 1] > 0.01: + + # Calculate riming term + # Factor of liq water taken out because implicit + zsnowrime[jl - 1] = 0.3*zcovptot[jl - 1]*ptsphy*yrecldp.rcl_const7s*zfallcorr*(zrho[jl - 1]*zsnowcld[jl - 1]*yrecldp.rcl_const1s)**yrecldp.rcl_const8s + + # Limit snow riming term + zsnowrime[jl - 1] = min(zsnowrime[jl - 1], 1.0) + + zsolqb[ncldql - 1, ncldqs - 1, jl - 1] = zsolqb[ncldql - 1, ncldqs - 1, jl - 1] + zsnowrime[jl - 1] + + + #------------------------------------------------------------------ + # Riming of ice by cloud water - implicit in lwc + # NOT YET ACTIVE + #------------------------------------------------------------------ + # IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + # + # ! Calculate riming term + # ! Factor of liq water taken out because implicit + # ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + # & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + # + # ! Limit ice riming term + # ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + # + # ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + # + # ENDIF + + # on IWARMRAIN > 1 + + + #---------------------------------------------------------------------- + # 4.4a MELTING OF SNOW and ICE + # with new implicit solver this also has to treat snow or ice + # precipitating from the level above... i.e. local ice AND flux. + # in situ ice and snow: could arise from LS advection or warming + # falling ice and snow: arrives by precipitation process + #---------------------------------------------------------------------- + for jl in range(kidia, kfdia + 1): + + zicetot[jl - 1] = zqxfg[ncldqi - 1, jl - 1] + zqxfg[ncldqs - 1, jl - 1] + zmeltmax[jl - 1] = 0.0 + + # If there are frozen hydrometeors present and dry-bulb temperature > 0degC + if zicetot[jl - 1] > zepsec and ztp1[jk - 1, jl - 1] > ydcst.rtt: + + # Calculate subsaturation + zsubsat = max(zqsice[jk - 1, jl - 1] - zqx[ncldqv - 1, jk - 1, jl - 1], 0.0) + + # Calculate difference between dry-bulb (ZTP1) and the temperature + # at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + # Melting only occurs if the wet-bulb temperature >0 + # i.e. warming of ice particle due to melting > cooling + # due to evaporation. + ztdmtw0 = ztp1[jk - 1, jl - 1] - ydcst.rtt - zsubsat*(ztw1 + ztw2*(pap[jk - 1, jl - 1] - ztw3) - ztw4*(ztp1[jk - 1, jl - 1] - ztw5)) + # Not implicit yet... + # Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + zcons1 = abs(ptsphy*(1.0 + 0.5*ztdmtw0) / yrecldp.rtaumel) + zmeltmax[jl - 1] = max(ztdmtw0*zcons1*zrldcp, 0.0) + + # Loop over frozen hydrometeors (ice, snow) + for jm in range(1, nclv + 1): + if iphase[jm - 1] == 2: + jn = imelt[jm - 1] + for jl in range(kidia, kfdia + 1): + if zmeltmax[jl - 1] > zepsec and zicetot[jl - 1] > zepsec: + # Apply melting in same proportion as frozen hydrometeor fractions + zalfa = zqxfg[jm - 1, jl - 1] / zicetot[jl - 1] + zmelt = min(zqxfg[jm - 1, jl - 1], zalfa*zmeltmax[jl - 1]) + # needed in first guess + # This implies that zqpretot has to be recalculated below + # since is not conserved here if ice falls and liquid doesn't + zqxfg[jm - 1, jl - 1] = zqxfg[jm - 1, jl - 1] - zmelt + zqxfg[jn - 1, jl - 1] = zqxfg[jn - 1, jl - 1] + zmelt + zsolqa[jm - 1, jn - 1, jl - 1] = zsolqa[jm - 1, jn - 1, jl - 1] + zmelt + zsolqa[jn - 1, jm - 1, jl - 1] = zsolqa[jn - 1, jm - 1, jl - 1] - zmelt + + #---------------------------------------------------------------------- + # 4.4b FREEZING of RAIN + #---------------------------------------------------------------------- + for jl in range(kidia, kfdia + 1): + + # If rain present + if zqx[ncldqr - 1, jk - 1, jl - 1] > zepsec: + + if ztp1[jk - 1, jl - 1] <= ydcst.rtt and ztp1[jk - 1 - 1, jl - 1] > ydcst.rtt: + # Base of melting layer/top of refreezing layer so + # store rain/snow fraction for precip type diagnosis + # If mostly rain, then supercooled rain slow to freeze + # otherwise faster to freeze (snow or ice pellets) + zqpretot[jl - 1] = max(zqx[ncldqs - 1, jk - 1, jl - 1] + zqx[ncldqr - 1, jk - 1, jl - 1], zepsec) + prainfrac_toprfz[jl - 1] = zqx[ncldqr - 1, jk - 1, jl - 1] / zqpretot[jl - 1] + if prainfrac_toprfz[jl - 1] > 0.8: + llrainliq[jl - 1] = True + else: + llrainliq[jl - 1] = False + + # If temperature less than zero + if ztp1[jk - 1, jl - 1] < ydcst.rtt: + + if prainfrac_toprfz[jl - 1] > 0.8: + + # Majority of raindrops completely melted + # Refreezing is by slow heterogeneous freezing + + # Slope of rain particle size distribution + zlambda = (yrecldp.rcl_fac1 / (zrho[jl - 1]*zqx[ncldqr - 1, jk - 1, jl - 1]))**yrecldp.rcl_fac2 + + # Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ztemp = yrecldp.rcl_fzrab*(ztp1[jk - 1, jl - 1] - ydcst.rtt) + zfrz = ptsphy*(yrecldp.rcl_const5r / zrho[jl - 1])*(np.exp(ztemp) - 1.)*zlambda**yrecldp.rcl_const6r + zfrzmax[jl - 1] = max(zfrz, 0.0) + + else: + + # Majority of raindrops only partially melted + # Refreeze with a shorter timescale (reverse of melting...for now) + + zcons1 = abs(ptsphy*(1.0 + 0.5*(ydcst.rtt - ztp1[jk - 1, jl - 1])) / yrecldp.rtaumel) + zfrzmax[jl - 1] = max((ydcst.rtt - ztp1[jk - 1, jl - 1])*zcons1*zrldcp, 0.0) + + + if zfrzmax[jl - 1] > zepsec: + zfrz = min(zqx[ncldqr - 1, jk - 1, jl - 1], zfrzmax[jl - 1]) + zsolqa[ncldqr - 1, ncldqs - 1, jl - 1] = zsolqa[ncldqr - 1, ncldqs - 1, jl - 1] + zfrz + zsolqa[ncldqs - 1, ncldqr - 1, jl - 1] = zsolqa[ncldqs - 1, ncldqr - 1, jl - 1] - zfrz + + + + #---------------------------------------------------------------------- + # 4.4c FREEZING of LIQUID + #---------------------------------------------------------------------- + for jl in range(kidia, kfdia + 1): + # not implicit yet... + zfrzmax[jl - 1] = max((yrecldp.rthomo - ztp1[jk - 1, jl - 1])*zrldcp, 0.0) + + jm = ncldql + jn = imelt[jm - 1] + for jl in range(kidia, kfdia + 1): + if zfrzmax[jl - 1] > zepsec and zqxfg[jm - 1, jl - 1] > zepsec: + zfrz = min(zqxfg[jm - 1, jl - 1], zfrzmax[jl - 1]) + zsolqa[jm - 1, jn - 1, jl - 1] = zsolqa[jm - 1, jn - 1, jl - 1] + zfrz + zsolqa[jn - 1, jm - 1, jl - 1] = zsolqa[jn - 1, jm - 1, jl - 1] - zfrz + + #---------------------------------------------------------------------- + # 4.5 EVAPORATION OF RAIN/SNOW + #---------------------------------------------------------------------- + + #---------------------------------------- + # Rain evaporation scheme from Sundquist + #---------------------------------------- + if ievaprain == 1: + + # Rain + + for jl in range(kidia, kfdia + 1): + + zzrh = yrecldp.rprecrhmax + (1.0 - yrecldp.rprecrhmax)*zcovpmax[jl - 1] / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + zzrh = min(max(zzrh, yrecldp.rprecrhmax), 1.0) + + zqe = (zqx[ncldqv - 1, jk - 1, jl - 1] - za[jk - 1, jl - 1]*zqsliq[jk - 1, jl - 1]) / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + #--------------------------------------------- + # humidity in moistest ZCOVPCLR part of domain + #--------------------------------------------- + zqe = max(0.0, min(zqe, zqsliq[jk - 1, jl - 1])) + llo1 = zcovpclr[jl - 1] > zepsec and zqxfg[ncldqr - 1, jl - 1] > zepsec and zqe < zzrh*zqsliq[jk - 1, jl - 1] + + if llo1: + # note: zpreclr is a rain flux + zpreclr = zqxfg[ncldqr - 1, jl - 1]*zcovpclr[jl - 1] / (max(abs(zcovptot[jl - 1]*zdtgdp[jl - 1]), zepsilon)*np.sign(zcovptot[jl - 1]*zdtgdp[jl - 1])) + + #-------------------------------------- + # actual microphysics formula in zbeta + #-------------------------------------- + + zbeta1 = np.sqrt(pap[jk - 1, jl - 1] / paph[klev + 1 - 1, jl - 1]) / yrecldp.rvrfactor*zpreclr / max(zcovpclr[jl - 1], zepsec) + + zbeta = ydcst.rg*yrecldp.rpecons*0.5*zbeta1**0.5777 + + zdenom = 1.0 + zbeta*ptsphy*zcorqsliq[jl - 1] + zdpr = zcovpclr[jl - 1]*zbeta*(zqsliq[jk - 1, jl - 1] - zqe) / zdenom*zdp[jl - 1]*zrg_r + zdpevap = zdpr*zdtgdp[jl - 1] + + #--------------------------------------------------------- + # add evaporation term to explicit sink. + # this has to be explicit since if treated in the implicit + # term evaporation can not reduce rain to zero and model + # produces small amounts of rainfall everywhere. + #--------------------------------------------------------- + + # Evaporate rain + zevap = min(zdpevap, zqxfg[ncldqr - 1, jl - 1]) + + zsolqa[ncldqr - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqr - 1, ncldqv - 1, jl - 1] + zevap + zsolqa[ncldqv - 1, ncldqr - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqr - 1, jl - 1] - zevap + + #------------------------------------------------------------- + # Reduce the total precip coverage proportional to evaporation + # to mimic the previous scheme which had a diagnostic + # 2-flux treatment, abandoned due to the new prognostic precip + #------------------------------------------------------------- + zcovptot[jl - 1] = max(yrecldp.rcovpmin, zcovptot[jl - 1] - max(0.0, (zcovptot[jl - 1] - za[jk - 1, jl - 1])*zevap / zqxfg[ncldqr - 1, jl - 1])) + + # Update fg field + zqxfg[ncldqr - 1, jl - 1] = zqxfg[ncldqr - 1, jl - 1] - zevap + + + + #--------------------------------------------------------- + # Rain evaporation scheme based on Abel and Boutle (2013) + #--------------------------------------------------------- + elif ievaprain == 2: + + for jl in range(kidia, kfdia + 1): + + #----------------------------------------------------------------------- + # Calculate relative humidity limit for rain evaporation + # to avoid cloud formation and saturation of the grid box + #----------------------------------------------------------------------- + # Limit RH for rain evaporation dependent on precipitation fraction + zzrh = yrecldp.rprecrhmax + (1.0 - yrecldp.rprecrhmax)*zcovpmax[jl - 1] / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + zzrh = min(max(zzrh, yrecldp.rprecrhmax), 1.0) + + # Critical relative humidity + #ZRHC=RAMID + #ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + # Increase RHcrit to 1.0 towards the surface (eta>0.8) + #IF(ZSIGK > 0.8_JPRB) THEN + # ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + #ENDIF + #ZZRH = MIN(ZRHC,ZZRH) + + # Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + zzrh = min(0.8, zzrh) + + zqe = max(0.0, min(zqx[ncldqv - 1, jk - 1, jl - 1], zqsliq[jk - 1, jl - 1])) + + llo1 = zcovpclr[jl - 1] > zepsec and zqxfg[ncldqr - 1, jl - 1] > zepsec and zqe < zzrh*zqsliq[jk - 1, jl - 1] + + if llo1: + + #------------------------------------------- + # Abel and Boutle (2012) evaporation + #------------------------------------------- + # Calculate local precipitation (kg/kg) + zpreclr = zqxfg[ncldqr - 1, jl - 1] / zcovptot[jl - 1] + + # Fallspeed air density correction + zfallcorr = (yrecldp.rdensref / zrho[jl - 1])**0.4 + + # Saturation vapour pressure with respect to liquid phase + zesatliq = ydcst.rv / ydcst.rd*foeeliq(ztp1[jk - 1, jl - 1]) + + # Slope of particle size distribution + zlambda = (yrecldp.rcl_fac1 / (zrho[jl - 1]*zpreclr))**yrecldp.rcl_fac2 # ZPRECLR=kg/kg + + zevap_denom = yrecldp.rcl_cdenom1*zesatliq - yrecldp.rcl_cdenom2*ztp1[jk - 1, jl - 1]*zesatliq + yrecldp.rcl_cdenom3*ztp1[jk - 1, jl - 1]**3.*pap[jk - 1, jl - 1] + + # Temperature dependent conductivity + zcorr2 = (ztp1[jk - 1, jl - 1] / 273.)**1.5*393. / (ztp1[jk - 1, jl - 1] + 120.) + zka = yrecldp.rcl_ka273*zcorr2 + + zsubsat = max(zzrh*zqsliq[jk - 1, jl - 1] - zqe, 0.0) + + zbeta = (0.5 / zqsliq[jk - 1, jl - 1])*ztp1[jk - 1, jl - 1]**2.*zesatliq*yrecldp.rcl_const1r*(zcorr2 / zevap_denom)*(0.78 / (zlambda**yrecldp.rcl_const4r) + yrecldp.rcl_const2r*(zrho[jl - 1]*zfallcorr)**0.5 / (zcorr2**0.5*zlambda**yrecldp.rcl_const3r)) + + zdenom = 1.0 + zbeta*ptsphy #*ZCORQSLIQ(JL) + zdpevap = zcovpclr[jl - 1]*zbeta*ptsphy*zsubsat / zdenom + + #--------------------------------------------------------- + # Add evaporation term to explicit sink. + # this has to be explicit since if treated in the implicit + # term evaporation can not reduce rain to zero and model + # produces small amounts of rainfall everywhere. + #--------------------------------------------------------- + + # Limit rain evaporation + zevap = min(zdpevap, zqxfg[ncldqr - 1, jl - 1]) + + zsolqa[ncldqr - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqr - 1, ncldqv - 1, jl - 1] + zevap + zsolqa[ncldqv - 1, ncldqr - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqr - 1, jl - 1] - zevap + + #------------------------------------------------------------- + # Reduce the total precip coverage proportional to evaporation + # to mimic the previous scheme which had a diagnostic + # 2-flux treatment, abandoned due to the new prognostic precip + #------------------------------------------------------------- + zcovptot[jl - 1] = max(yrecldp.rcovpmin, zcovptot[jl - 1] - max(0.0, (zcovptot[jl - 1] - za[jk - 1, jl - 1])*zevap / zqxfg[ncldqr - 1, jl - 1])) + + # Update fg field + zqxfg[ncldqr - 1, jl - 1] = zqxfg[ncldqr - 1, jl - 1] - zevap + + + # on IEVAPRAIN + + #---------------------------------------------------------------------- + # 4.5 EVAPORATION OF SNOW + #---------------------------------------------------------------------- + # Snow + if ievapsnow == 1: + + for jl in range(kidia, kfdia + 1): + zzrh = yrecldp.rprecrhmax + (1.0 - yrecldp.rprecrhmax)*zcovpmax[jl - 1] / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + zzrh = min(max(zzrh, yrecldp.rprecrhmax), 1.0) + zqe = (zqx[ncldqv - 1, jk - 1, jl - 1] - za[jk - 1, jl - 1]*zqsice[jk - 1, jl - 1]) / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + + #--------------------------------------------- + # humidity in moistest ZCOVPCLR part of domain + #--------------------------------------------- + zqe = max(0.0, min(zqe, zqsice[jk - 1, jl - 1])) + llo1 = zcovpclr[jl - 1] > zepsec and zqxfg[ncldqs - 1, jl - 1] > zepsec and zqe < zzrh*zqsice[jk - 1, jl - 1] + + if llo1: + # note: zpreclr is a rain flux a + zpreclr = zqxfg[ncldqs - 1, jl - 1]*zcovpclr[jl - 1] / (max(abs(zcovptot[jl - 1]*zdtgdp[jl - 1]), zepsilon)*np.sign(zcovptot[jl - 1]*zdtgdp[jl - 1])) + + #-------------------------------------- + # actual microphysics formula in zbeta + #-------------------------------------- + + zbeta1 = np.sqrt(pap[jk - 1, jl - 1] / paph[klev + 1 - 1, jl - 1]) / yrecldp.rvrfactor*zpreclr / max(zcovpclr[jl - 1], zepsec) + + zbeta = ydcst.rg*yrecldp.rpecons*zbeta1**0.5777 + + zdenom = 1.0 + zbeta*ptsphy*zcorqsice[jl - 1] + zdpr = zcovpclr[jl - 1]*zbeta*(zqsice[jk - 1, jl - 1] - zqe) / zdenom*zdp[jl - 1]*zrg_r + zdpevap = zdpr*zdtgdp[jl - 1] + + #--------------------------------------------------------- + # add evaporation term to explicit sink. + # this has to be explicit since if treated in the implicit + # term evaporation can not reduce snow to zero and model + # produces small amounts of snowfall everywhere. + #--------------------------------------------------------- + + # Evaporate snow + zevap = min(zdpevap, zqxfg[ncldqs - 1, jl - 1]) + + zsolqa[ncldqs - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqs - 1, ncldqv - 1, jl - 1] + zevap + zsolqa[ncldqv - 1, ncldqs - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqs - 1, jl - 1] - zevap + + #------------------------------------------------------------- + # Reduce the total precip coverage proportional to evaporation + # to mimic the previous scheme which had a diagnostic + # 2-flux treatment, abandoned due to the new prognostic precip + #------------------------------------------------------------- + zcovptot[jl - 1] = max(yrecldp.rcovpmin, zcovptot[jl - 1] - max(0.0, (zcovptot[jl - 1] - za[jk - 1, jl - 1])*zevap / zqxfg[ncldqs - 1, jl - 1])) + + #Update first guess field + zqxfg[ncldqs - 1, jl - 1] = zqxfg[ncldqs - 1, jl - 1] - zevap + + #--------------------------------------------------------- + elif ievapsnow == 2: + + + for jl in range(kidia, kfdia + 1): + + #----------------------------------------------------------------------- + # Calculate relative humidity limit for snow evaporation + #----------------------------------------------------------------------- + zzrh = yrecldp.rprecrhmax + (1.0 - yrecldp.rprecrhmax)*zcovpmax[jl - 1] / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + zzrh = min(max(zzrh, yrecldp.rprecrhmax), 1.0) + zqe = (zqx[ncldqv - 1, jk - 1, jl - 1] - za[jk - 1, jl - 1]*zqsice[jk - 1, jl - 1]) / max(zepsec, 1.0 - za[jk - 1, jl - 1]) + + #--------------------------------------------- + # humidity in moistest ZCOVPCLR part of domain + #--------------------------------------------- + zqe = max(0.0, min(zqe, zqsice[jk - 1, jl - 1])) + llo1 = zcovpclr[jl - 1] > zepsec and zqx[ncldqs - 1, jk - 1, jl - 1] > zepsec and zqe < zzrh*zqsice[jk - 1, jl - 1] + + if llo1: + + # Calculate local precipitation (kg/kg) + zpreclr = zqx[ncldqs - 1, jk - 1, jl - 1] / zcovptot[jl - 1] + zvpice = foeeice(ztp1[jk - 1, jl - 1])*ydcst.rv / ydcst.rd + + # Particle size distribution + # ZTCG increases Ni with colder temperatures - essentially a + # Fletcher or Meyers scheme? + ztcg = 1.0 #v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + # ZFACX1I modification is based on Andrew Barrett's results + zfacx1s = 1.0 #v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + zaplusb = yrecldp.rcl_apb1*zvpice - yrecldp.rcl_apb2*zvpice*ztp1[jk - 1, jl - 1] + pap[jk - 1, jl - 1]*yrecldp.rcl_apb3*ztp1[jk - 1, jl - 1]**3 + zcorrfac = (1.0 / zrho[jl - 1])**0.5 + zcorrfac2 = ((ztp1[jk - 1, jl - 1] / 273.0)**1.5)*(393.0 / (ztp1[jk - 1, jl - 1] + 120.0)) + + zpr02 = zrho[jl - 1]*zpreclr*yrecldp.rcl_const1s / (ztcg*zfacx1s) + + zterm1 = (zqsice[jk - 1, jl - 1] - zqe)*ztp1[jk - 1, jl - 1]**2*zvpice*zcorrfac2*ztcg*yrecldp.rcl_const2s*zfacx1s / (zrho[jl - 1]*zaplusb*zqsice[jk - 1, jl - 1]) + zterm2 = 0.65*yrecldp.rcl_const6s*zpr02**yrecldp.rcl_const4s + yrecldp.rcl_const3s*zcorrfac**0.5*zrho[jl - 1]**0.5*zpr02**yrecldp.rcl_const5s / zcorrfac2**0.5 + + zdpevap = max(zcovpclr[jl - 1]*zterm1*zterm2*ptsphy, 0.0) + + #-------------------------------------------------------------------- + # Limit evaporation to snow amount + #-------------------------------------------------------------------- + zevap = min(zdpevap, zevaplimice[jl - 1]) + zevap = min(zevap, zqx[ncldqs - 1, jk - 1, jl - 1]) + + + zsolqa[ncldqs - 1, ncldqv - 1, jl - 1] = zsolqa[ncldqs - 1, ncldqv - 1, jl - 1] + zevap + zsolqa[ncldqv - 1, ncldqs - 1, jl - 1] = zsolqa[ncldqv - 1, ncldqs - 1, jl - 1] - zevap + + #------------------------------------------------------------- + # Reduce the total precip coverage proportional to evaporation + # to mimic the previous scheme which had a diagnostic + # 2-flux treatment, abandoned due to the new prognostic precip + #------------------------------------------------------------- + zcovptot[jl - 1] = max(yrecldp.rcovpmin, zcovptot[jl - 1] - max(0.0, (zcovptot[jl - 1] - za[jk - 1, jl - 1])*zevap / zqx[ncldqs - 1, jk - 1, jl - 1])) + + #Update first guess field + zqxfg[ncldqs - 1, jl - 1] = zqxfg[ncldqs - 1, jl - 1] - zevap + + + # on IEVAPSNOW + + #-------------------------------------- + # Evaporate small precipitation amounts + #-------------------------------------- + for jm in range(1, nclv + 1): + if llfall[jm - 1]: + for jl in range(kidia, kfdia + 1): + if zqxfg[jm - 1, jl - 1] < yrecldp.rlmin: + zsolqa[jm - 1, ncldqv - 1, jl - 1] = zsolqa[jm - 1, ncldqv - 1, jl - 1] + zqxfg[jm - 1, jl - 1] + zsolqa[ncldqv - 1, jm - 1, jl - 1] = zsolqa[ncldqv - 1, jm - 1, jl - 1] - zqxfg[jm - 1, jl - 1] + + ####################################################################### + # 5.0 *** SOLVERS FOR A AND L *** + # now use an implicit solution rather than exact solution + # solver is forward in time, upstream difference for advection + ####################################################################### + + #--------------------------- + # 5.1 solver for cloud cover + #--------------------------- + for jl in range(kidia, kfdia + 1): + zanew = (za[jk - 1, jl - 1] + zsolac[jl - 1]) / (1.0 + zsolab[jl - 1]) + zanew = min(zanew, 1.0) + if zanew < yrecldp.ramin: + zanew = 0.0 + zda[jl - 1] = zanew - zaorig[jk - 1, jl - 1] + #--------------------------------- + # variables needed for next level + #--------------------------------- + zanewm1[jl - 1] = zanew + + #-------------------------------- + # 5.2 solver for the microphysics + #-------------------------------- + + #-------------------------------------------------------------- + # Truncate explicit sinks to avoid negatives + # Note: Species are treated in the order in which they run out + # since the clipping will alter the balance for the other vars + #-------------------------------------------------------------- + + for jm in range(1, nclv + 1): + for jn in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + llindex3[jm - 1, jn - 1, jl - 1] = False + for jl in range(kidia, kfdia + 1): + zsinksum[jm - 1, jl - 1] = 0.0 + + #---------------------------- + # collect sink terms and mark + #---------------------------- + for jm in range(1, nclv + 1): + for jn in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zsinksum[jm - 1, jl - 1] = zsinksum[jm - 1, jl - 1] - zsolqa[jn - 1, jm - 1, jl - 1] # +ve total is bad + + #--------------------------------------- + # calculate overshoot and scaling factor + #--------------------------------------- + for jm in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zmax = max(zqx[jm - 1, jk - 1, jl - 1], zepsec) + zrat = max(zsinksum[jm - 1, jl - 1], zmax) + zratio[jm - 1, jl - 1] = zmax / zrat + + #-------------------------------------------- + # scale the sink terms, in the correct order, + # recalculating the scale factor each time + #-------------------------------------------- + for jm in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zsinksum[jm - 1, jl - 1] = 0.0 + + #---------------- + # recalculate sum + #---------------- + for jm in range(1, nclv + 1): + psum_solqa[:] = 0.0 + for jn in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + psum_solqa[jl - 1] = psum_solqa[jl - 1] + zsolqa[jn - 1, jm - 1, jl - 1] + for jl in range(kidia, kfdia + 1): + # ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + zsinksum[jm - 1, jl - 1] = zsinksum[jm - 1, jl - 1] - psum_solqa[jl - 1] + #--------------------------- + # recalculate scaling factor + #--------------------------- + for jl in range(kidia, kfdia + 1): + zmm = max(zqx[jm - 1, jk - 1, jl - 1], zepsec) + zrr = max(zsinksum[jm - 1, jl - 1], zmm) + zratio[jm - 1, jl - 1] = zmm / zrr + #------ + # scale + #------ + for jl in range(kidia, kfdia + 1): + zzratio = zratio[jm - 1, jl - 1] + #DIR$ IVDEP + #DIR$ PREFERVECTOR + for jn in range(1, nclv + 1): + if zsolqa[jn - 1, jm - 1, jl - 1] < 0.0: + zsolqa[jn - 1, jm - 1, jl - 1] = zsolqa[jn - 1, jm - 1, jl - 1]*zzratio + zsolqa[jm - 1, jn - 1, jl - 1] = zsolqa[jm - 1, jn - 1, jl - 1]*zzratio + + #-------------------------------------------------------------- + # 5.2.2 Solver + #------------------------ + + #------------------------ + # set the LHS of equation + #------------------------ + for jm in range(1, nclv + 1): + for jn in range(1, nclv + 1): + #---------------------------------------------- + # diagonals: microphysical sink terms+transport + #---------------------------------------------- + if jn == jm: + for jl in range(kidia, kfdia + 1): + zqlhs[jm - 1, jn - 1, jl - 1] = 1.0 + zfallsink[jm - 1, jl - 1] + for jo in range(1, nclv + 1): + zqlhs[jm - 1, jn - 1, jl - 1] = zqlhs[jm - 1, jn - 1, jl - 1] + zsolqb[jn - 1, jo - 1, jl - 1] + #------------------------------------------ + # non-diagonals: microphysical source terms + #------------------------------------------ + else: + for jl in range(kidia, kfdia + 1): + zqlhs[jm - 1, jn - 1, jl - 1] = -zsolqb[jm - 1, jn - 1, jl - 1] # here is the delta T - missing from doc. + + #------------------------ + # set the RHS of equation + #------------------------ + for jm in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + #--------------------------------- + # sum the explicit source and sink + #--------------------------------- + zexplicit = 0.0 + for jn in range(1, nclv + 1): + zexplicit = zexplicit + zsolqa[jn - 1, jm - 1, jl - 1] # sum over middle index + zqxn[jm - 1, jl - 1] = zqx[jm - 1, jk - 1, jl - 1] + zexplicit + + #----------------------------------- + # *** solve by LU decomposition: *** + #----------------------------------- + + # Note: This fast way of solving NCLVxNCLV system + # assumes a good behaviour (i.e. non-zero diagonal + # terms with comparable orders) of the matrix stored + # in ZQLHS. For the moment this is the case but + # be aware to preserve it when doing eventual + # modifications. + + # Non pivoting recursive factorization + for jn in range(1, nclv - 1 + 1): + # number of steps + for jm in range(jn + 1, nclv + 1): + # row index + for jl in range(kidia, kfdia + 1): + zqlhs[jn - 1, jm - 1, jl - 1] = zqlhs[jn - 1, jm - 1, jl - 1] / zqlhs[jn - 1, jn - 1, jl - 1] + for ik in range(jn + 1, nclv + 1): + # column index + for jl in range(kidia, kfdia + 1): + zqlhs[ik - 1, jm - 1, jl - 1] = zqlhs[ik - 1, jm - 1, jl - 1] - zqlhs[jn - 1, jm - 1, jl - 1]*zqlhs[ik - 1, jn - 1, jl - 1] + + # Backsubstitution + # step 1 + for jn in range(2, nclv + 1): + for jm in range(1, jn - 1 + 1): + for jl in range(kidia, kfdia + 1): + zqxn[jn - 1, jl - 1] = zqxn[jn - 1, jl - 1] - zqlhs[jm - 1, jn - 1, jl - 1]*zqxn[jm - 1, jl - 1] + # step 2 + for jl in range(kidia, kfdia + 1): + zqxn[nclv - 1, jl - 1] = zqxn[nclv - 1, jl - 1] / zqlhs[nclv - 1, nclv - 1, jl - 1] + for jn in range(nclv - 1, 1 + -1, -1): + for jm in range(jn + 1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zqxn[jn - 1, jl - 1] = zqxn[jn - 1, jl - 1] - zqlhs[jm - 1, jn - 1, jl - 1]*zqxn[jm - 1, jl - 1] + for jl in range(kidia, kfdia + 1): + zqxn[jn - 1, jl - 1] = zqxn[jn - 1, jl - 1] / zqlhs[jn - 1, jn - 1, jl - 1] + + # Ensure no small values (including negatives) remain in cloud variables nor + # precipitation rates. + # Evaporate l,i,r,s to water vapour. Latent heating taken into account below + for jn in range(1, nclv - 1 + 1): + for jl in range(kidia, kfdia + 1): + if zqxn[jn - 1, jl - 1] < zepsec: + zqxn[ncldqv - 1, jl - 1] = zqxn[ncldqv - 1, jl - 1] + zqxn[jn - 1, jl - 1] + zqxn[jn - 1, jl - 1] = 0.0 + + #-------------------------------- + # variables needed for next level + #-------------------------------- + for jm in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zqxnm1[jm - 1, jl - 1] = zqxn[jm - 1, jl - 1] + zqxn2d[jm - 1, jk - 1, jl - 1] = zqxn[jm - 1, jl - 1] + + #------------------------------------------------------------------------ + # 5.3 Precipitation/sedimentation fluxes to next level + # diagnostic precipitation fluxes + # It is this scaled flux that must be used for source to next layer + #------------------------------------------------------------------------ + + for jm in range(1, nclv + 1): + for jl in range(kidia, kfdia + 1): + zpfplsx[jm - 1, jk + 1 - 1, jl - 1] = zfallsink[jm - 1, jl - 1]*zqxn[jm - 1, jl - 1]*zrdtgdp[jl - 1] + + # Ensure precipitation fraction is zero if no precipitation + for jl in range(kidia, kfdia + 1): + zqpretot[jl - 1] = zpfplsx[ncldqs - 1, jk + 1 - 1, jl - 1] + zpfplsx[ncldqr - 1, jk + 1 - 1, jl - 1] + for jl in range(kidia, kfdia + 1): + if zqpretot[jl - 1] < zepsec: + zcovptot[jl - 1] = 0.0 + + ####################################################################### + # 6 *** UPDATE TENDANCIES *** + ####################################################################### + + #-------------------------------- + # 6.1 Temperature and CLV budgets + #-------------------------------- + + for jm in range(1, nclv - 1 + 1): + for jl in range(kidia, kfdia + 1): + + # calculate fluxes in and out of box for conservation of TL + zfluxq[jm - 1, jl - 1] = zpsupsatsrce[jm - 1, jl - 1] + zconvsrce[jm - 1, jl - 1] + zfallsrce[jm - 1, jl - 1] - (zfallsink[jm - 1, jl - 1] + zconvsink[jm - 1, jl - 1])*zqxn[jm - 1, jl - 1] + + if iphase[jm - 1] == 1: + for jl in range(kidia, kfdia + 1): + tendency_loc_t[jk - 1, jl - 1] = tendency_loc_t[jk - 1, jl - 1] + ydthf.ralvdcp*(zqxn[jm - 1, jl - 1] - zqx[jm - 1, jk - 1, jl - 1] - zfluxq[jm - 1, jl - 1])*zqtmst + + if iphase[jm - 1] == 2: + for jl in range(kidia, kfdia + 1): + tendency_loc_t[jk - 1, jl - 1] = tendency_loc_t[jk - 1, jl - 1] + ydthf.ralsdcp*(zqxn[jm - 1, jl - 1] - zqx[jm - 1, jk - 1, jl - 1] - zfluxq[jm - 1, jl - 1])*zqtmst + + #---------------------------------------------------------------------- + # New prognostic tendencies - ice,liquid rain,snow + # Note: CLV arrays use PCLV in calculation of tendency while humidity + # uses ZQX. This is due to clipping at start of cloudsc which + # include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + #---------------------------------------------------------------------- + for jl in range(kidia, kfdia + 1): + tendency_loc_cld[jm - 1, jk - 1, jl - 1] = tendency_loc_cld[jm - 1, jk - 1, jl - 1] + (zqxn[jm - 1, jl - 1] - zqx0[jm - 1, jk - 1, jl - 1])*zqtmst + + + for jl in range(kidia, kfdia + 1): + #---------------------- + # 6.2 Humidity budget + #---------------------- + tendency_loc_q[jk - 1, jl - 1] = tendency_loc_q[jk - 1, jl - 1] + (zqxn[ncldqv - 1, jl - 1] - zqx[ncldqv - 1, jk - 1, jl - 1])*zqtmst + + #------------------- + # 6.3 cloud cover + #----------------------- + tendency_loc_a[jk - 1, jl - 1] = tendency_loc_a[jk - 1, jl - 1] + zda[jl - 1]*zqtmst + + #-------------------------------------------------- + # Copy precipitation fraction into output variable + #------------------------------------------------- + for jl in range(kidia, kfdia + 1): + pcovptot[jk - 1, jl - 1] = zcovptot[jl - 1] + + # on vertical level JK + #---------------------------------------------------------------------- + # END OF VERTICAL LOOP + #---------------------------------------------------------------------- + + ####################################################################### + # 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + ####################################################################### + + #-------------------------------------------------------------------- + # Copy general precip arrays back into PFP arrays for GRIB archiving + # Add rain and liquid fluxes, ice and snow fluxes + #-------------------------------------------------------------------- + for jk in range(1, klev + 1 + 1): + for jl in range(kidia, kfdia + 1): + pfplsl[jk - 1, jl - 1] = zpfplsx[ncldqr - 1, jk - 1, jl - 1] + zpfplsx[ncldql - 1, jk - 1, jl - 1] + pfplsn[jk - 1, jl - 1] = zpfplsx[ncldqs - 1, jk - 1, jl - 1] + zpfplsx[ncldqi - 1, jk - 1, jl - 1] + + #-------- + # Fluxes: + #-------- + for jl in range(kidia, kfdia + 1): + pfsqlf[1 - 1, jl - 1] = 0.0 + pfsqif[1 - 1, jl - 1] = 0.0 + pfsqrf[1 - 1, jl - 1] = 0.0 + pfsqsf[1 - 1, jl - 1] = 0.0 + pfcqlng[1 - 1, jl - 1] = 0.0 + pfcqnng[1 - 1, jl - 1] = 0.0 + pfcqrng[1 - 1, jl - 1] = 0.0 #rain + pfcqsng[1 - 1, jl - 1] = 0.0 #snow + # fluxes due to turbulence + pfsqltur[1 - 1, jl - 1] = 0.0 + pfsqitur[1 - 1, jl - 1] = 0.0 + + for jk in range(1, klev + 1): + for jl in range(kidia, kfdia + 1): + + zgdph_r = -zrg_r*(paph[jk + 1 - 1, jl - 1] - paph[jk - 1, jl - 1])*zqtmst + pfsqlf[jk + 1 - 1, jl - 1] = pfsqlf[jk - 1, jl - 1] + pfsqif[jk + 1 - 1, jl - 1] = pfsqif[jk - 1, jl - 1] + pfsqrf[jk + 1 - 1, jl - 1] = pfsqlf[jk - 1, jl - 1] + pfsqsf[jk + 1 - 1, jl - 1] = pfsqif[jk - 1, jl - 1] + pfcqlng[jk + 1 - 1, jl - 1] = pfcqlng[jk - 1, jl - 1] + pfcqnng[jk + 1 - 1, jl - 1] = pfcqnng[jk - 1, jl - 1] + pfcqrng[jk + 1 - 1, jl - 1] = pfcqlng[jk - 1, jl - 1] + pfcqsng[jk + 1 - 1, jl - 1] = pfcqnng[jk - 1, jl - 1] + pfsqltur[jk + 1 - 1, jl - 1] = pfsqltur[jk - 1, jl - 1] + pfsqitur[jk + 1 - 1, jl - 1] = pfsqitur[jk - 1, jl - 1] + + zalfaw = zfoealfa[jk - 1, jl - 1] + + # Liquid , LS scheme minus detrainment + pfsqlf[jk + 1 - 1, jl - 1] = pfsqlf[jk + 1 - 1, jl - 1] + (zqxn2d[ncldql - 1, jk - 1, jl - 1] - zqx0[ncldql - 1, jk - 1, jl - 1] + pvfl[jk - 1, jl - 1]*ptsphy - zalfaw*plude[jk - 1, jl - 1])*zgdph_r + # liquid, negative numbers + pfcqlng[jk + 1 - 1, jl - 1] = pfcqlng[jk + 1 - 1, jl - 1] + zlneg[ncldql - 1, jk - 1, jl - 1]*zgdph_r + + # liquid, vertical diffusion + pfsqltur[jk + 1 - 1, jl - 1] = pfsqltur[jk + 1 - 1, jl - 1] + pvfl[jk - 1, jl - 1]*ptsphy*zgdph_r + + # Rain, LS scheme + pfsqrf[jk + 1 - 1, jl - 1] = pfsqrf[jk + 1 - 1, jl - 1] + (zqxn2d[ncldqr - 1, jk - 1, jl - 1] - zqx0[ncldqr - 1, jk - 1, jl - 1])*zgdph_r + # rain, negative numbers + pfcqrng[jk + 1 - 1, jl - 1] = pfcqrng[jk + 1 - 1, jl - 1] + zlneg[ncldqr - 1, jk - 1, jl - 1]*zgdph_r + + # Ice , LS scheme minus detrainment + pfsqif[jk + 1 - 1, jl - 1] = pfsqif[jk + 1 - 1, jl - 1] + (zqxn2d[ncldqi - 1, jk - 1, jl - 1] - zqx0[ncldqi - 1, jk - 1, jl - 1] + pvfi[jk - 1, jl - 1]*ptsphy - (1.0 - zalfaw)*plude[jk - 1, jl - 1])*zgdph_r + # ice, negative numbers + pfcqnng[jk + 1 - 1, jl - 1] = pfcqnng[jk + 1 - 1, jl - 1] + zlneg[ncldqi - 1, jk - 1, jl - 1]*zgdph_r + + # ice, vertical diffusion + pfsqitur[jk + 1 - 1, jl - 1] = pfsqitur[jk + 1 - 1, jl - 1] + pvfi[jk - 1, jl - 1]*ptsphy*zgdph_r + + # snow, LS scheme + pfsqsf[jk + 1 - 1, jl - 1] = pfsqsf[jk + 1 - 1, jl - 1] + (zqxn2d[ncldqs - 1, jk - 1, jl - 1] - zqx0[ncldqs - 1, jk - 1, jl - 1])*zgdph_r + # snow, negative numbers + pfcqsng[jk + 1 - 1, jl - 1] = pfcqsng[jk + 1 - 1, jl - 1] + zlneg[ncldqs - 1, jk - 1, jl - 1]*zgdph_r + + #----------------------------------- + # enthalpy flux due to precipitation + #----------------------------------- + for jk in range(1, klev + 1 + 1): + for jl in range(kidia, kfdia + 1): + pfhpsl[jk - 1, jl - 1] = -ydcst.rlvtt*pfplsl[jk - 1, jl - 1] + pfhpsn[jk - 1, jl - 1] = -ydcst.rlstt*pfplsn[jk - 1, jl - 1] + + #=============================================================================== + #IF (LHOOK) CALL DR_HOOK('CLOUDSC',1,ZHOOK_HANDLE) + return diff --git a/src/cloudsc_python/src/cloudscf2py/include/abor1.intfb.h b/src/cloudsc_python/src/cloudscf2py/include/abor1.intfb.h new file mode 100644 index 00000000..9b91b83b --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/include/abor1.intfb.h @@ -0,0 +1,14 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +INTERFACE +SUBROUTINE ABOR1(CDTEXT) +CHARACTER(LEN=*) :: CDTEXT +END SUBROUTINE ABOR1 +END INTERFACE diff --git a/src/cloudsc_python/src/cloudscf2py/include/fccld.base.h b/src/cloudsc_python/src/cloudscf2py/include/fccld.base.h new file mode 100644 index 00000000..32d3746f --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/include/fccld.base.h @@ -0,0 +1,27 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +!* +! ------------------------------------------------------------------ +! This COMDECK defines functions to be used in the cloud scheme +! other than the standard saturation vapour pressure +! +! FKOOP modifies the ice saturation mixing ratio for homogeneous +! nucleation +! +! note: PTARE is temperature and is definited in frttre.h +! which MUST be included before this function block +! +! ********************************************** +! KOOP formula for homogeneous nucleation of ice +! ********************************************** +! +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOKOOP +FOKOOP (PTARE) = MIN( _PREFIX1_ RKOOP1 - _PREFIX1_ RKOOP2*PTARE,FOEELIQ(PTARE)/FOEEICE(PTARE)) diff --git a/src/cloudsc_python/src/cloudscf2py/include/fccld.func.h b/src/cloudsc_python/src/cloudscf2py/include/fccld.func.h new file mode 100644 index 00000000..af5dcca1 --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/include/fccld.func.h @@ -0,0 +1,27 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +!* +! ------------------------------------------------------------------ +! This COMDECK defines functions to be used in the cloud scheme +! other than the standard saturation vapour pressure +! +! FKOOP modifies the ice saturation mixing ratio for homogeneous +! nucleation +! +! note: PTARE is temperature and is definited in frttre.h +! which MUST be included before this function block +! +! ********************************************** +! KOOP formula for homogeneous nucleation of ice +! ********************************************** +! +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOKOOP +FOKOOP (PTARE) = MIN(RKOOP1-RKOOP2*PTARE,FOEELIQ(PTARE)/FOEEICE(PTARE)) diff --git a/src/cloudsc_python/src/cloudscf2py/include/fccld.ydthf.h b/src/cloudsc_python/src/cloudscf2py/include/fccld.ydthf.h new file mode 100644 index 00000000..e0ca36a0 --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/include/fccld.ydthf.h @@ -0,0 +1,3 @@ +#define _PREFIX1_ YDTHF% +#include "fccld.base.h" +#undef _PREFIX1_ diff --git a/src/cloudsc_python/src/cloudscf2py/include/fcttre.base.h b/src/cloudsc_python/src/cloudscf2py/include/fcttre.base.h new file mode 100644 index 00000000..88b6701c --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/include/fcttre.base.h @@ -0,0 +1,172 @@ +!* +! ------------------------------------------------------------------ + +! This COMDECK includes the Thermodynamical functions for the cy39 +! ECMWF Physics package. +! Consistent with YOMCST Basic physics constants, assuming the +! partial pressure of water vapour is given by a first order +! Taylor expansion of Qs(T) w.r.t. to Temperature, using constants +! in YOETHF +! Two sets of functions are available. In the first set only the +! cases water or ice are distinguished by temperature. This set +! consists of the functions FOEDELTA,FOEEW,FOEDE and FOELH. +! The second set considers, besides the two cases water and ice +! also a mix of both for the temperature range _PREFIX2_ RTICE < T < _PREFIX2_ RTWAT. +! This set contains FOEALFA,FOEEWM,FOEDEM,FOELDCPM and FOELHM. +! FKOOP modifies the ice saturation mixing ratio for homogeneous +! nucleation. FOE_DEWM_DT provides an approximate first derivative +! of FOEEWM. + +! Depending on the consideration of mixed phases either the first +! set (e.g. surface, post-processing) or the second set +! (e.g. clouds, condensation, convection) should be used. + +! ------------------------------------------------------------------ +! ***************************************************************** + +! NO CONSIDERATION OF MIXED PHASES + +! ***************************************************************** +REAL(KIND=JPRB) :: FOEDELTA +REAL(KIND=JPRB) :: PTARE +FOEDELTA (PTARE) = MAX (0.0_JPRB,SIGN(1.0_JPRB,PTARE- _PREFIX1_ RTT)) + +! FOEDELTA = 1 water +! FOEDELTA = 0 ice + +! THERMODYNAMICAL FUNCTIONS . + +! Pressure of water vapour at saturation +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEEW,FOEDE,FOEDESU,FOELH,FOELDCP +FOEEW ( PTARE ) = _PREFIX2_ R2ES*EXP (& + &( _PREFIX2_ R3LES*FOEDELTA(PTARE)+ _PREFIX2_ R3IES*(1.0_JPRB-FOEDELTA(PTARE)))*(PTARE- _PREFIX1_ RTT)& +&/ (PTARE-( _PREFIX2_ R4LES*FOEDELTA(PTARE)+ _PREFIX2_ R4IES*(1.0_JPRB-FOEDELTA(PTARE))))) + +FOEDE ( PTARE ) = & + &(FOEDELTA(PTARE)* _PREFIX2_ R5ALVCP+(1.0_JPRB-FOEDELTA(PTARE))* _PREFIX2_ R5ALSCP)& +&/ (PTARE-( _PREFIX2_ R4LES*FOEDELTA(PTARE)+ _PREFIX2_ R4IES*(1.0_JPRB-FOEDELTA(PTARE))))**2 + +FOEDESU ( PTARE ) = & + &(FOEDELTA(PTARE)* _PREFIX2_ R5LES+(1.0_JPRB-FOEDELTA(PTARE))* _PREFIX2_ R5IES)& +&/ (PTARE-( _PREFIX2_ R4LES*FOEDELTA(PTARE)+ _PREFIX2_ R4IES*(1.0_JPRB-FOEDELTA(PTARE))))**2 + +FOELH ( PTARE ) =& + &FOEDELTA(PTARE)* _PREFIX1_ RLVTT + (1.0_JPRB-FOEDELTA(PTARE))* _PREFIX1_ RLSTT + +FOELDCP ( PTARE ) = & + &FOEDELTA(PTARE)* _PREFIX2_ RALVDCP + (1.0_JPRB-FOEDELTA(PTARE))* _PREFIX2_ RALSDCP + +! ***************************************************************** + +! CONSIDERATION OF MIXED PHASES + +! ***************************************************************** + +! FOEALFA is calculated to distinguish the three cases: + +! FOEALFA=1 water phase +! FOEALFA=0 ice phase +! 0 < FOEALFA < 1 mixed phase + +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEALFA +FOEALFA (PTARE) = MIN(1.0_JPRB,((MAX( _PREFIX2_ RTICE,MIN( _PREFIX2_ RTWAT,PTARE))- _PREFIX2_ RTICE)& + &* _PREFIX2_ RTWAT_RTICE_R)**2) + + +! Pressure of water vapour at saturation +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEEWM,FOEDEM,FOELDCPM,FOELHM,FOE_DEWM_DT +FOEEWM ( PTARE ) = _PREFIX2_ R2ES *& + &(FOEALFA(PTARE)*EXP( _PREFIX2_ R3LES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4LES))+& + &(1.0_JPRB-FOEALFA(PTARE))*EXP( _PREFIX2_ R3IES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4IES))) + +FOE_DEWM_DT( PTARE ) = _PREFIX2_ R2ES * ( & + & _PREFIX2_ R3LES*FOEALFA(PTARE)*EXP( _PREFIX2_ R3LES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4LES)) & + & *( _PREFIX1_ RTT- _PREFIX2_ R4LES)/(PTARE- _PREFIX2_ R4LES)**2 + & + & _PREFIX2_ R3IES*(1.0-FOEALFA(PTARE))*EXP( _PREFIX2_ R3IES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4IES)) & + & *( _PREFIX1_ RTT- _PREFIX2_ R4IES)/(PTARE- _PREFIX2_ R4IES)**2) + +FOEDEM ( PTARE ) = FOEALFA(PTARE)* _PREFIX2_ R5ALVCP*(1.0_JPRB/(PTARE- _PREFIX2_ R4LES)**2)+& + &(1.0_JPRB-FOEALFA(PTARE))* _PREFIX2_ R5ALSCP*(1.0_JPRB/(PTARE- _PREFIX2_ R4IES)**2) + +FOELDCPM ( PTARE ) = FOEALFA(PTARE)* _PREFIX2_ RALVDCP+& + &(1.0_JPRB-FOEALFA(PTARE))* _PREFIX2_ RALSDCP + +FOELHM ( PTARE ) =& + &FOEALFA(PTARE)* _PREFIX1_ RLVTT+(1.0_JPRB-FOEALFA(PTARE))* _PREFIX1_ RLSTT + + +! Temperature normalization for humidity background change of variable +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOETB +FOETB ( PTARE )=FOEALFA(PTARE)* _PREFIX2_ R3LES*( _PREFIX1_ RTT- _PREFIX2_ R4LES)*(1.0_JPRB/(PTARE- _PREFIX2_ R4LES)**2)+& + &(1.0_JPRB-FOEALFA(PTARE))* _PREFIX2_ R3IES*( _PREFIX1_ RTT- _PREFIX2_ R4IES)*(1.0_JPRB/(PTARE- _PREFIX2_ R4IES)**2) + +! ------------------------------------------------------------------ +! ***************************************************************** + +! CONSIDERATION OF DIFFERENT MIXED PHASE FOR CONV + +! ***************************************************************** + +! FOEALFCU is calculated to distinguish the three cases: + +! FOEALFCU=1 water phase +! FOEALFCU=0 ice phase +! 0 < FOEALFCU < 1 mixed phase + +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEALFCU +FOEALFCU (PTARE) = MIN(1.0_JPRB,((MAX( _PREFIX2_ RTICECU,MIN( _PREFIX2_ RTWAT,PTARE))& +&- _PREFIX2_ RTICECU)* _PREFIX2_ RTWAT_RTICECU_R)**2) + + +! Pressure of water vapour at saturation +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEEWMCU,FOEDEMCU,FOELDCPMCU,FOELHMCU +FOEEWMCU ( PTARE ) = _PREFIX2_ R2ES *& + &(FOEALFCU(PTARE)*EXP( _PREFIX2_ R3LES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4LES))+& + &(1.0_JPRB-FOEALFCU(PTARE))*EXP( _PREFIX2_ R3IES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4IES))) + +FOEDEMCU ( PTARE )=FOEALFCU(PTARE)* _PREFIX2_ R5ALVCP*(1.0_JPRB/(PTARE- _PREFIX2_ R4LES)**2)+& + &(1.0_JPRB-FOEALFCU(PTARE))* _PREFIX2_ R5ALSCP*(1.0_JPRB/(PTARE- _PREFIX2_ R4IES)**2) + +FOELDCPMCU ( PTARE ) = FOEALFCU(PTARE)* _PREFIX2_ RALVDCP+& + &(1.0_JPRB-FOEALFCU(PTARE))* _PREFIX2_ RALSDCP + +FOELHMCU ( PTARE ) =& + &FOEALFCU(PTARE)* _PREFIX1_ RLVTT+(1.0_JPRB-FOEALFCU(PTARE))* _PREFIX1_ RLSTT +! ------------------------------------------------------------------ + +! Pressure of water vapour at saturation +! This one is for the WMO definition of saturation, i.e. always +! with respect to water. +! +! Duplicate to FOEELIQ and FOEEICE for separate ice variable +! FOEELIQ always respect to water +! FOEEICE always respect to ice +! (could use FOEEW and FOEEWMO, but naming convention unclear) +! FOELSON returns e wrt liquid water using D Sonntag (1994, Met. Zeit.) +! - now recommended for use with radiosonde data (WMO CIMO guide, 2014) +! unlike the FOEE functions does not include 1/( _PREFIX1_ RETV+1.0_JPRB) factor + +REAL(KIND=JPRB) :: FOEEWMO, FOEELIQ, FOEEICE, FOELSON +FOEEWMO( PTARE ) = _PREFIX2_ R2ES*EXP( _PREFIX2_ R3LES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4LES)) +FOEELIQ( PTARE ) = _PREFIX2_ R2ES*EXP( _PREFIX2_ R3LES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4LES)) +FOEEICE( PTARE ) = _PREFIX2_ R2ES*EXP( _PREFIX2_ R3IES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4IES)) +FOELSON( PTARE ) = EXP( -6096.9385_JPRB/PTARE + 21.2409642_JPRB & + - 2.711193E-2_JPRB * PTARE & + + 1.673952E-5_JPRB * PTARE**2 & + + 2.433502_JPRB * LOG(PTARE)) + +REAL(KIND=JPRB) :: FOEEWM_V,FOEEWMCU_V,FOELES_V,FOEIES_V +REAL(KIND=JPRB) :: EXP1,EXP2 + FOELES_V(PTARE)= _PREFIX2_ R3LES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4LES) + FOEIES_V(PTARE)= _PREFIX2_ R3IES*(PTARE- _PREFIX1_ RTT)/(PTARE- _PREFIX2_ R4IES) + FOEEWM_V( PTARE,EXP1,EXP2 )= _PREFIX2_ R2ES*(FOEALFA(PTARE)*EXP1+ & + & (1.0_JPRB-FOEALFA(PTARE))*EXP2) + FOEEWMCU_V ( PTARE,EXP1,EXP2 ) = _PREFIX2_ R2ES*(FOEALFCU(PTARE)*EXP1+& + &(1.0_JPRB-FOEALFCU(PTARE))*EXP2) + diff --git a/src/cloudsc_python/src/cloudscf2py/include/fcttre.func.h b/src/cloudsc_python/src/cloudscf2py/include/fcttre.func.h new file mode 100644 index 00000000..96473150 --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/include/fcttre.func.h @@ -0,0 +1,174 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +!* +! ------------------------------------------------------------------ + +! This COMDECK includes the Thermodynamical functions for the cy39 +! ECMWF Physics package. +! Consistent with YOMCST Basic physics constants, assuming the +! partial pressure of water vapour is given by a first order +! Taylor expansion of Qs(T) w.r.t. to Temperature, using constants +! in YOETHF +! Two sets of functions are available. In the first set only the +! cases water or ice are distinguished by temperature. This set +! consists of the functions FOEDELTA,FOEEW,FOEDE and FOELH. +! The second set considers, besides the two cases water and ice +! also a mix of both for the temperature range RTICE < T < RTWAT. +! This set contains FOEALFA,FOEEWM,FOEDEM,FOELDCPM and FOELHM. +! FKOOP modifies the ice saturation mixing ratio for homogeneous +! nucleation. FOE_DEWM_DT provides an approximate first derivative +! of FOEEWM. + +! Depending on the consideration of mixed phases either the first +! set (e.g. surface, post-processing) or the second set +! (e.g. clouds, condensation, convection) should be used. + +! ------------------------------------------------------------------ +! ***************************************************************** + +! NO CONSIDERATION OF MIXED PHASES + +! ***************************************************************** +REAL(KIND=JPRB) :: FOEDELTA +REAL(KIND=JPRB) :: PTARE +FOEDELTA (PTARE) = MAX (0.0_JPRB,SIGN(1.0_JPRB,PTARE-RTT)) + +! FOEDELTA = 1 water +! FOEDELTA = 0 ice + +! THERMODYNAMICAL FUNCTIONS . + +! Pressure of water vapour at saturation +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEEW,FOEDE,FOEDESU,FOELH,FOELDCP +FOEEW ( PTARE ) = R2ES*EXP (& + &(R3LES*FOEDELTA(PTARE)+R3IES*(1.0_JPRB-FOEDELTA(PTARE)))*(PTARE-RTT)& +&/ (PTARE-(R4LES*FOEDELTA(PTARE)+R4IES*(1.0_JPRB-FOEDELTA(PTARE))))) + +FOEDE ( PTARE ) = & + &(FOEDELTA(PTARE)*R5ALVCP+(1.0_JPRB-FOEDELTA(PTARE))*R5ALSCP)& +&/ (PTARE-(R4LES*FOEDELTA(PTARE)+R4IES*(1.0_JPRB-FOEDELTA(PTARE))))**2 + +FOEDESU ( PTARE ) = & + &(FOEDELTA(PTARE)*R5LES+(1.0_JPRB-FOEDELTA(PTARE))*R5IES)& +&/ (PTARE-(R4LES*FOEDELTA(PTARE)+R4IES*(1.0_JPRB-FOEDELTA(PTARE))))**2 + +FOELH ( PTARE ) =& + &FOEDELTA(PTARE)*RLVTT + (1.0_JPRB-FOEDELTA(PTARE))*RLSTT + +FOELDCP ( PTARE ) = & + &FOEDELTA(PTARE)*RALVDCP + (1.0_JPRB-FOEDELTA(PTARE))*RALSDCP + +! ***************************************************************** + +! CONSIDERATION OF MIXED PHASES + +! ***************************************************************** + +! FOEALFA is calculated to distinguish the three cases: + +! FOEALFA=1 water phase +! FOEALFA=0 ice phase +! 0 < FOEALFA < 1 mixed phase + +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEALFA +FOEALFA (PTARE) = MIN(1.0_JPRB,((MAX(RTICE,MIN(RTWAT,PTARE))-RTICE)& + &*RTWAT_RTICE_R)**2) + + +! Pressure of water vapour at saturation +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEEWM,FOEDEM,FOELDCPM,FOELHM,FOE_DEWM_DT +FOEEWM ( PTARE ) = R2ES *& + &(FOEALFA(PTARE)*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))+& + &(1.0_JPRB-FOEALFA(PTARE))*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES))) + +FOE_DEWM_DT( PTARE ) = R2ES * ( & + & R3LES*FOEALFA(PTARE)*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES)) & + & *(RTT-R4LES)/(PTARE-R4LES)**2 + & + & R3IES*(1.0-FOEALFA(PTARE))*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES)) & + & *(RTT-R4IES)/(PTARE-R4IES)**2) + +FOEDEM ( PTARE ) = FOEALFA(PTARE)*R5ALVCP*(1.0_JPRB/(PTARE-R4LES)**2)+& + &(1.0_JPRB-FOEALFA(PTARE))*R5ALSCP*(1.0_JPRB/(PTARE-R4IES)**2) + +FOELDCPM ( PTARE ) = FOEALFA(PTARE)*RALVDCP+& + &(1.0_JPRB-FOEALFA(PTARE))*RALSDCP + +FOELHM ( PTARE ) =& + &FOEALFA(PTARE)*RLVTT+(1.0_JPRB-FOEALFA(PTARE))*RLSTT + + +! Temperature normalization for humidity background change of variable +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOETB +FOETB ( PTARE )=FOEALFA(PTARE)*R3LES*(RTT-R4LES)*(1.0_JPRB/(PTARE-R4LES)**2)+& + &(1.0_JPRB-FOEALFA(PTARE))*R3IES*(RTT-R4IES)*(1.0_JPRB/(PTARE-R4IES)**2) + +! ------------------------------------------------------------------ +! ***************************************************************** + +! CONSIDERATION OF DIFFERENT MIXED PHASE FOR CONV + +! ***************************************************************** + +! FOEALFCU is calculated to distinguish the three cases: + +! FOEALFCU=1 water phase +! FOEALFCU=0 ice phase +! 0 < FOEALFCU < 1 mixed phase + +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEALFCU +FOEALFCU (PTARE) = MIN(1.0_JPRB,((MAX(RTICECU,MIN(RTWAT,PTARE))& +&-RTICECU)*RTWAT_RTICECU_R)**2) + + +! Pressure of water vapour at saturation +! INPUT : PTARE = TEMPERATURE +REAL(KIND=JPRB) :: FOEEWMCU,FOEDEMCU,FOELDCPMCU,FOELHMCU +FOEEWMCU ( PTARE ) = R2ES *& + &(FOEALFCU(PTARE)*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES))+& + &(1.0_JPRB-FOEALFCU(PTARE))*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES))) + +FOEDEMCU ( PTARE )=FOEALFCU(PTARE)*R5ALVCP*(1.0_JPRB/(PTARE-R4LES)**2)+& + &(1.0_JPRB-FOEALFCU(PTARE))*R5ALSCP*(1.0_JPRB/(PTARE-R4IES)**2) + +FOELDCPMCU ( PTARE ) = FOEALFCU(PTARE)*RALVDCP+& + &(1.0_JPRB-FOEALFCU(PTARE))*RALSDCP + +FOELHMCU ( PTARE ) =& + &FOEALFCU(PTARE)*RLVTT+(1.0_JPRB-FOEALFCU(PTARE))*RLSTT +! ------------------------------------------------------------------ + +! Pressure of water vapour at saturation +! This one is for the WMO definition of saturation, i.e. always +! with respect to water. +! +! Duplicate to FOEELIQ and FOEEICE for separate ice variable +! FOEELIQ always respect to water +! FOEEICE always respect to ice +! (could use FOEEW and FOEEWMO, but naming convention unclear) + +REAL(KIND=JPRB) :: FOEEWMO, FOEELIQ, FOEEICE +FOEEWMO( PTARE ) = R2ES*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES)) +FOEELIQ( PTARE ) = R2ES*EXP(R3LES*(PTARE-RTT)/(PTARE-R4LES)) +FOEEICE( PTARE ) = R2ES*EXP(R3IES*(PTARE-RTT)/(PTARE-R4IES)) + +REAL(KIND=JPRB) :: FOEEWM_V,FOEEWMCU_V,FOELES_V,FOEIES_V +REAL(KIND=JPRB) :: EXP1,EXP2 + FOELES_V(PTARE)=R3LES*(PTARE-RTT)/(PTARE-R4LES) + FOEIES_V(PTARE)=R3IES*(PTARE-RTT)/(PTARE-R4IES) + FOEEWM_V( PTARE,EXP1,EXP2 )=R2ES*(FOEALFA(PTARE)*EXP1+ & + & (1.0_JPRB-FOEALFA(PTARE))*EXP2) + FOEEWMCU_V ( PTARE,EXP1,EXP2 ) = R2ES*(FOEALFCU(PTARE)*EXP1+& + &(1.0_JPRB-FOEALFCU(PTARE))*EXP2) + diff --git a/src/cloudsc_python/src/cloudscf2py/include/fcttre.ycst.h b/src/cloudsc_python/src/cloudscf2py/include/fcttre.ycst.h new file mode 100644 index 00000000..cc234120 --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/include/fcttre.ycst.h @@ -0,0 +1,5 @@ +#define _PREFIX1_ YDCST% +#define _PREFIX2_ YDTHF% +#include "fcttre.base.h" +#undef _PREFIX1_ +#undef _PREFIX2_ diff --git a/src/cloudsc_python/src/cloudscf2py/inputs.py b/src/cloudsc_python/src/cloudscf2py/inputs.py new file mode 100644 index 00000000..e2986b9e --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/inputs.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +# (C) Copyright 2018- ECMWF. + +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + + +import h5py +import numpy as np + +from pathlib import Path +from collections import OrderedDict + + +NCLV = 5 # number of microphysics variables + + +def load_input_fields(path, transpose=False): + """ + """ + fields = OrderedDict() + + argnames = [ + 'PT', 'PQ', + 'TENDENCY_TMP_T', 'TENDENCY_TMP_Q', 'TENDENCY_TMP_A', 'TENDENCY_TMP_CLD', + 'PVFA', 'PVFL', 'PVFI', 'PDYNA', 'PDYNL', 'PDYNI', 'PHRSW', + 'PHRLW', 'PVERVEL', 'PAP', 'PAPH', 'PLSM', 'LDCUM', 'KTYPE', + 'PLU', 'PLUDE', 'PSNDE', 'PMFU', 'PMFD', 'PA', 'PCLV', + 'PSUPSAT', 'PLCRIT_AER', 'PICRIT_AER', 'PRE_ICE', 'PCCN', 'PNICE' + ] + + with h5py.File(path, 'r') as f: + fields['KLON'] = f['KLON'][0] + fields['KLEV'] = f['KLEV'][0] + fields['PTSPHY'] = f['PTSPHY'][0] + + klon = fields['KLON'] + klev = fields['KLEV'] + + for argname in argnames: + fields[argname] = np.ascontiguousarray(f[argname]) + + fields['TENDENCY_LOC_A'] = np.ndarray(order="C", shape=(klev, klon)) + fields['TENDENCY_LOC_T'] = np.ndarray(order="C", shape=(klev, klon)) + fields['TENDENCY_LOC_Q'] = np.ndarray(order="C", shape=(klev, klon)) + fields['TENDENCY_LOC_CLD'] = np.ndarray(order="C", shape=(NCLV, klev, klon)) + fields['PCOVPTOT'] = np.ndarray(order="C", shape=(klev, klon)) + fields['PRAINFRAC_TOPRFZ'] = np.ndarray(order="C", shape=(klon,)) + + fields['PFSQLF'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFSQIF'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFCQNNG'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFCQLNG'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFSQRF'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFSQSF'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFCQRNG'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFCQSNG'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFSQLTUR'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFSQITUR'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFPLSL'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFPLSN'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFHPSL'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields['PFHPSN'] = np.ndarray(order="C", shape=(klev+1, klon)) + + return fields + + +def load_input_parameters(path): + class TECLDP: + pass + yrecldp = TECLDP() + + class TEPHLI: + pass + yrephli = TEPHLI() + + class TMCST: + pass + yrmcst = TMCST() + + class TETHF: + pass + yrethf = TETHF() + + class TECLD: + pass + yrecld = TECLD() + + with h5py.File(path, 'r') as f: + tecldp_keys = [k for k in f.keys() if 'YRECLDP' in k] + for k in tecldp_keys: + attrkey = k.replace('YRECLDP_', '').lower() + setattr(yrecldp, attrkey, f[k][0]) + tephli_keys = [k for k in f.keys() if 'YREPHLI' in k] + for k in tephli_keys: + attrkey = k.replace('YREPHLI_', '').lower() + setattr(yrephli, attrkey, f[k][0]) + + yrmcst.rg = f['RG'][0] + yrmcst.rd = f['RD'][0] + yrmcst.rcpd = f['RCPD'][0] + yrmcst.retv = f['RETV'][0] + yrmcst.rlvtt = f['RLVTT'][0] + yrmcst.rlstt = f['RLSTT'][0] + yrmcst.rlmlt = f['RLMLT'][0] + yrmcst.rtt = f['RTT'][0] + yrmcst.rv = f['RV'][0] + + yrethf.r2es = f['R2ES'][0] + yrethf.r3les = f['R3LES'][0] + yrethf.r3ies = f['R3IES'][0] + yrethf.r4les = f['R4LES'][0] + yrethf.r4ies = f['R4IES'][0] + yrethf.r5les = f['R5LES'][0] + yrethf.r5ies = f['R5IES'][0] + yrethf.r5alvcp = f['R5ALVCP'][0] + yrethf.r5alscp = f['R5ALSCP'][0] + yrethf.ralvdcp = f['RALVDCP'][0] + yrethf.ralsdcp = f['RALSDCP'][0] + yrethf.ralfdcp = f['RALFDCP'][0] + yrethf.rtwat = f['RTWAT'][0] + yrethf.rtice = f['RTICE'][0] + yrethf.rticecu = f['RTICECU'][0] + yrethf.rtwat_rtice_r = f['RTWAT_RTICE_R'][0] + yrethf.rtwat_rticecu_r = f['RTWAT_RTICECU_R'][0] + yrethf.rkoop1 = f['RKOOP1'][0] + yrethf.rkoop2 = f['RKOOP2'][0] + + yrethf.rvtmp2 = 0.0 + + klev = f['KLEV'][0] + pap = np.ascontiguousarray(f['PAP']) + paph = np.ascontiguousarray(f['PAPH']) + yrecld.ceta = np.ndarray(order="C", shape=(klev, )) + yrecld.ceta[:] = pap[0:,0] / paph[klev,0] + + yrephli.lphylin = True + + return yrecldp, yrmcst, yrethf, yrephli, yrecld + + +def load_reference_fields(path): + """ + """ + fields = OrderedDict() + + argnames = [ + 'PLUDE', 'PCOVPTOT', 'PFPLSL', 'PFPLSN', 'PFHPSL', 'PFHPSN', + 'TENDENCY_LOC_A', 'TENDENCY_LOC_Q', 'TENDENCY_LOC_T', 'TENDENCY_LOC_CLD', + ] + + with h5py.File(path, 'r') as f: + for argname in argnames: + fields[argname.lower()] = np.ascontiguousarray(f[argname]) + + return fields diff --git a/src/cloudsc_python/src/cloudscf2py/yoecldp.F90 b/src/cloudsc_python/src/cloudscf2py/yoecldp.F90 new file mode 100644 index 00000000..0e97a851 --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/yoecldp.F90 @@ -0,0 +1,371 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE YOECLDP + +USE PARKIND1, ONLY : JPIM, JPRB +USE FILE_IO_MOD, ONLY : LOAD_SCALAR, LOAD_ARRAY + +IMPLICIT NONE + +SAVE + +! ----------------------------------------------------------------- +! ** YOECLDP - CONTROL PARAMETERS FOR PROGNOSTIC CLOUD SCHEME +! ----------------------------------------------------------------- + +! * E.C.M.W.F. PHYSICS PACKAGE * + +! C. JAKOB E.C.M.W.F. 94/02/07 +! A. Tompkins E.C.M.W.F. 2004/12/03 total water variance setup for +! moist advection-diffusion PBL +! A. Tompkins E.C.M.W.F. 2004/09/02 Aerosol in microphysics switches +! JJMorcrette ECMWF 20100813 Aerosol index for aerosol-cloud interactions +! R. Forbes ECMWF 20110301 Added ice deposition parameters +! R. Forbes ECMWF 20150115 Added additional ice, snow and rain parameters + +! NAME TYPE PURPOSE +! ---- ---- ------- + +! *RAMID* REAL BASE VALUE FOR CALCULATION OF RELATIVE +! HUMIDITY THRESHOLD FOR ONSET OF STRATIFORM +! CONDENSATION (TIEDTKE, 1993, EQUATION 24) +! *RCLDIFF* REAL DIFFUSION-COEFFICIENT FOR EVAPORATION BY +! TURBULENT MIXING (IBID., EQU. 30) +! *RCLDIFF_CONVI*REAL ENHANCEMENT FACTOR OF RCLDIFF FOR CONVECTION +! *RCLCRIT* REAL BASE VALUE OF CRITICAL CLOUD WATER CONTENT +! FOR CONVERSION TO RAIN (SUNDQUIST, 1988) +! *RCLCRIT_SEA* REAL BASE VALUE OF CRITICAL CLOUD WATER CONTENT FOR SEA +! *RCLCRIT_LAND* REAL BASE VALUE OF CRITICAL CLOUD WATER CONTENT FOR LAND +! *RKCONV* REAL BASE VALUE FOR CONVERSION COEFFICIENT (IBID.) +! *RPRC1* REAL COALESCENCE CONSTANT (IBID.) +! *RPRC2* REAL BERGERON-FINDEISEN CONSTANT (IBID.) +! *RCLDMAX* REAL MAXIMUM CLOUD WATER CONTENT +! *RPECONS* REAL EVAPORATION CONSTANT AFTER KESSLER +! (TIEDTKE, 1993, EQU.35) +! *RPRECRHMAX* REAL MAX THRESHOLD RH FOR EVAPORATION FOR ZERO COVER +! *RTAUMEL* REAL RELAXATION TIME FOR MELTING OF SNOW +! *RAMIN* REAL LIMIT FOR A +! *RLMIN* REAL LIMIT FOR L +! *RKOOPTAU* REAL TIMESCALE FOR ICE SUPERSATURATION REMOVAL +! *RVICE* REAL FIXED ICE FALLSPEED +! *RVRAIN* REAL FIXED RAIN FALLSPEED +! *RVSNOW* REAL FIXED SNOW FALLSPEED +! *RTHOMO* REAL TEMPERATURE THRESHOLD FOR SPONTANEOUS FREEZING OF LIQUID DROPLETS +! *RCOVPMIN* REAL MINIMUM PRECIPITATION COVERAGE REQUIRED FOR THE NEW PROGNOSTIC PRECIP +! *RCLDTOPP* REAL TOP PRESSURE FOR CLOUD CALCULATION +! *NCLDTOP* INTEGER TOP LEVEL FOR CLOUD CALCULATION +! *NSSOPT* INTEGER PARAMETRIZATION CHOICE FOR SUPERSATURATION +! *NCLDDIAG*INTEGER CONTROLS CLOUDSC DIAGNOSTICS IN PEXTRA +! *NCLV* INTEGER NUMBER OF PROGNOSTIC EQUATIONS IN CLOUDSC +! (INCLUDES WATER VAPOUR AS DUMMY VARIABLE) +! NAERCLD INT INDEX TO CONTROL SWITCHES FOR +! AEROSOL-MICROPHYSICS INTERACTION, LAER* +! NAECLxx INT INDEX OF GEMS AEROSOLS USED IN AEROSOL-CLOUD INTERACTIONS +! RCCN REAL DEFAULT CCN (CM-3) +! RNICE REAL DEFAULT ICE NUMBER CONCENTRATION (CM-3) +! LAERLIQAUTOLSP LOG AEROSOLS AFFECT RAIN AUTOCONVERSION IN LSP +! LAERLIQAUTOCP LOG AEROSOLS AFFECT RAIN AUTOCONVERSION IN CP +! LAERLIQCOLL LOG AEROSOLS AFFECT RAIN COLLECTION +! LAERICESED LOG AEROSOLS AFFECT ICE SEDIMENTATION +! LAERICEAUTO LOG AEROSOLS AFFECT ICE AUTOCONVERSION +! RCCNOM REAL CONSTANT IN MENON PARAM FOR ORGANIC MATTER -> CCN +! RCCNSS REAL CONSTANT IN MENON PARAM SEA SALT -> CCN +! RCCNSU REAL CONSTANT IN MENON PARAM FOR SULPHATE -> CCN +! RCLDTOPCF REAL Cloud fraction threshold that defines cloud top +! RDEPLIQREFRATE REAL Fraction of deposition rate in cloud top layer +! RDEPLIQREFDEPTH REAL Depth of supercooled liquid water layer (m) +! RVRFACTOR REAL KESSLER FACTOR=5.09E-3 FOR EVAPORATION OF CLEAR-SKY RAIN (KESSLER,1969) + +INTEGER(KIND=JPIM),PARAMETER :: NCLV=5 ! number of microphysics variables +INTEGER(KIND=JPIM),PARAMETER :: NCLDQL=1 ! liquid cloud water +INTEGER(KIND=JPIM),PARAMETER :: NCLDQI=2 ! ice cloud water +INTEGER(KIND=JPIM),PARAMETER :: NCLDQR=3 ! rain water +INTEGER(KIND=JPIM),PARAMETER :: NCLDQS=4 ! snow +INTEGER(KIND=JPIM),PARAMETER :: NCLDQV=5 ! vapour + + +TYPE :: TECLDP +REAL(KIND=JPRB) :: RAMID +REAL(KIND=JPRB) :: RCLDIFF +REAL(KIND=JPRB) :: RCLDIFF_CONVI +REAL(KIND=JPRB) :: RCLCRIT +REAL(KIND=JPRB) :: RCLCRIT_SEA +REAL(KIND=JPRB) :: RCLCRIT_LAND +REAL(KIND=JPRB) :: RKCONV +REAL(KIND=JPRB) :: RPRC1 +REAL(KIND=JPRB) :: RPRC2 +REAL(KIND=JPRB) :: RCLDMAX +REAL(KIND=JPRB) :: RPECONS +REAL(KIND=JPRB) :: RVRFACTOR +REAL(KIND=JPRB) :: RPRECRHMAX +REAL(KIND=JPRB) :: RTAUMEL +REAL(KIND=JPRB) :: RAMIN +REAL(KIND=JPRB) :: RLMIN +REAL(KIND=JPRB) :: RKOOPTAU +REAL(KIND=JPRB) :: RCLDTOPP +REAL(KIND=JPRB) :: RLCRITSNOW +REAL(KIND=JPRB) :: RSNOWLIN1 +REAL(KIND=JPRB) :: RSNOWLIN2 +REAL(KIND=JPRB) :: RICEHI1 +REAL(KIND=JPRB) :: RICEHI2 +REAL(KIND=JPRB) :: RICEINIT +REAL(KIND=JPRB) :: RVICE +REAL(KIND=JPRB) :: RVRAIN +REAL(KIND=JPRB) :: RVSNOW +REAL(KIND=JPRB) :: RTHOMO +REAL(KIND=JPRB) :: RCOVPMIN +REAL(KIND=JPRB) :: RCCN +REAL(KIND=JPRB) :: RNICE +REAL(KIND=JPRB) :: RCCNOM +REAL(KIND=JPRB) :: RCCNSS +REAL(KIND=JPRB) :: RCCNSU +REAL(KIND=JPRB) :: RCLDTOPCF +REAL(KIND=JPRB) :: RDEPLIQREFRATE +REAL(KIND=JPRB) :: RDEPLIQREFDEPTH +!-------------------------------------------------------- +! Autoconversion/accretion (Khairoutdinov and Kogan 2000) +!-------------------------------------------------------- +REAL(KIND=JPRB) :: RCL_KKAac +REAL(KIND=JPRB) :: RCL_KKBac +REAL(KIND=JPRB) :: RCL_KKAau +REAL(KIND=JPRB) :: RCL_KKBauq +REAL(KIND=JPRB) :: RCL_KKBaun +REAL(KIND=JPRB) :: RCL_KK_cloud_num_sea +REAL(KIND=JPRB) :: RCL_KK_cloud_num_land +!-------------------------------------------------------- +! Ice +!-------------------------------------------------------- +REAL(KIND=JPRB) :: RCL_AI +REAL(KIND=JPRB) :: RCL_BI +REAL(KIND=JPRB) :: RCL_CI +REAL(KIND=JPRB) :: RCL_DI +REAL(KIND=JPRB) :: RCL_X1I +REAL(KIND=JPRB) :: RCL_X2I +REAL(KIND=JPRB) :: RCL_X3I +REAL(KIND=JPRB) :: RCL_X4I +REAL(KIND=JPRB) :: RCL_CONST1I +REAL(KIND=JPRB) :: RCL_CONST2I +REAL(KIND=JPRB) :: RCL_CONST3I +REAL(KIND=JPRB) :: RCL_CONST4I +REAL(KIND=JPRB) :: RCL_CONST5I +REAL(KIND=JPRB) :: RCL_CONST6I +REAL(KIND=JPRB) :: RCL_APB1 +REAL(KIND=JPRB) :: RCL_APB2 +REAL(KIND=JPRB) :: RCL_APB3 +!-------------------------------------------------------- +! Snow +!-------------------------------------------------------- +REAL(KIND=JPRB) :: RCL_AS +REAL(KIND=JPRB) :: RCL_BS +REAL(KIND=JPRB) :: RCL_CS +REAL(KIND=JPRB) :: RCL_DS +REAL(KIND=JPRB) :: RCL_X1S +REAL(KIND=JPRB) :: RCL_X2S +REAL(KIND=JPRB) :: RCL_X3S +REAL(KIND=JPRB) :: RCL_X4S +REAL(KIND=JPRB) :: RCL_CONST1S +REAL(KIND=JPRB) :: RCL_CONST2S +REAL(KIND=JPRB) :: RCL_CONST3S +REAL(KIND=JPRB) :: RCL_CONST4S +REAL(KIND=JPRB) :: RCL_CONST5S +REAL(KIND=JPRB) :: RCL_CONST6S +REAL(KIND=JPRB) :: RCL_CONST7S +REAL(KIND=JPRB) :: RCL_CONST8S +!-------------------------------------------------------- +! Rain +!-------------------------------------------------------- +REAL(KIND=JPRB) :: RDENSWAT +REAL(KIND=JPRB) :: RDENSREF +REAL(KIND=JPRB) :: RCL_AR +REAL(KIND=JPRB) :: RCL_BR +REAL(KIND=JPRB) :: RCL_CR +REAL(KIND=JPRB) :: RCL_DR +REAL(KIND=JPRB) :: RCL_X1R +REAL(KIND=JPRB) :: RCL_X2R +REAL(KIND=JPRB) :: RCL_X4R +REAL(KIND=JPRB) :: RCL_KA273 +REAL(KIND=JPRB) :: RCL_CDENOM1 +REAL(KIND=JPRB) :: RCL_CDENOM2 +REAL(KIND=JPRB) :: RCL_CDENOM3 +REAL(KIND=JPRB) :: RCL_SCHMIDT +REAL(KIND=JPRB) :: RCL_DYNVISC +REAL(KIND=JPRB) :: RCL_CONST1R +REAL(KIND=JPRB) :: RCL_CONST2R +REAL(KIND=JPRB) :: RCL_CONST3R +REAL(KIND=JPRB) :: RCL_CONST4R +REAL(KIND=JPRB) :: RCL_FAC1 +REAL(KIND=JPRB) :: RCL_FAC2 +! Rain freezing +REAL(KIND=JPRB) :: RCL_CONST5R +REAL(KIND=JPRB) :: RCL_CONST6R +REAL(KIND=JPRB) :: RCL_FZRAB +REAL(KIND=JPRB) :: RCL_FZRBB + +LOGICAL :: LCLDEXTRA, LCLDBUDGET + +INTEGER(KIND=JPIM) :: NSSOPT +INTEGER(KIND=JPIM) :: NCLDTOP +INTEGER(KIND=JPIM) :: NAECLBC, NAECLDU, NAECLOM, NAECLSS, NAECLSU +INTEGER(KIND=JPIM) :: NCLDDIAG + +! aerosols +INTEGER(KIND=JPIM) :: NAERCLD +LOGICAL :: LAERLIQAUTOLSP +LOGICAL :: LAERLIQAUTOCP +LOGICAL :: LAERLIQAUTOCPB +LOGICAL :: LAERLIQCOLL +LOGICAL :: LAERICESED +LOGICAL :: LAERICEAUTO + +! variance arrays +REAL(KIND=JPRB) :: NSHAPEP +REAL(KIND=JPRB) :: NSHAPEQ +INTEGER(KIND=JPIM) :: NBETA +REAL(KIND=JPRB) :: RBETA(0:100) +REAL(KIND=JPRB) :: RBETAP1(0:100) + + +END TYPE TECLDP + +TYPE(TECLDP), ALLOCATABLE :: YRECLDP + +CONTAINS + + SUBROUTINE YRECLDP_LOAD_PARAMETERS() + IF(.NOT.ALLOCATED(YRECLDP)) ALLOCATE(YRECLDP) + CALL LOAD_SCALAR('YRECLDP_RAMID', YRECLDP%RAMID) + CALL LOAD_SCALAR('YRECLDP_RCLDIFF', YRECLDP%RCLDIFF) + CALL LOAD_SCALAR('YRECLDP_RCLDIFF_CONVI', YRECLDP%RCLDIFF_CONVI) + CALL LOAD_SCALAR('YRECLDP_RCLCRIT', YRECLDP%RCLCRIT) + CALL LOAD_SCALAR('YRECLDP_RCLCRIT_SEA', YRECLDP%RCLCRIT_SEA) + CALL LOAD_SCALAR('YRECLDP_RCLCRIT_LAND', YRECLDP%RCLCRIT_LAND) + CALL LOAD_SCALAR('YRECLDP_RKCONV', YRECLDP%RKCONV) + CALL LOAD_SCALAR('YRECLDP_RPRC1', YRECLDP%RPRC1) + CALL LOAD_SCALAR('YRECLDP_RPRC2', YRECLDP%RPRC2) + CALL LOAD_SCALAR('YRECLDP_RCLDMAX', YRECLDP%RCLDMAX) + CALL LOAD_SCALAR('YRECLDP_RPECONS', YRECLDP%RPECONS) + CALL LOAD_SCALAR('YRECLDP_RVRFACTOR', YRECLDP%RVRFACTOR) + CALL LOAD_SCALAR('YRECLDP_RPRECRHMAX', YRECLDP%RPRECRHMAX) + CALL LOAD_SCALAR('YRECLDP_RTAUMEL', YRECLDP%RTAUMEL) + CALL LOAD_SCALAR('YRECLDP_RAMIN', YRECLDP%RAMIN) + CALL LOAD_SCALAR('YRECLDP_RLMIN', YRECLDP%RLMIN) + CALL LOAD_SCALAR('YRECLDP_RKOOPTAU', YRECLDP%RKOOPTAU) + + CALL LOAD_SCALAR('YRECLDP_RCLDTOPP', YRECLDP%RCLDTOPP) + CALL LOAD_SCALAR('YRECLDP_RLCRITSNOW', YRECLDP%RLCRITSNOW) + CALL LOAD_SCALAR('YRECLDP_RSNOWLIN1', YRECLDP%RSNOWLIN1) + CALL LOAD_SCALAR('YRECLDP_RSNOWLIN2', YRECLDP%RSNOWLIN2) + CALL LOAD_SCALAR('YRECLDP_RICEHI1', YRECLDP%RICEHI1) + CALL LOAD_SCALAR('YRECLDP_RICEHI2', YRECLDP%RICEHI2) + CALL LOAD_SCALAR('YRECLDP_RICEINIT', YRECLDP%RICEINIT) + CALL LOAD_SCALAR('YRECLDP_RVICE', YRECLDP%RVICE) + CALL LOAD_SCALAR('YRECLDP_RVRAIN', YRECLDP%RVRAIN) + CALL LOAD_SCALAR('YRECLDP_RVSNOW', YRECLDP%RVSNOW) + CALL LOAD_SCALAR('YRECLDP_RTHOMO', YRECLDP%RTHOMO) + CALL LOAD_SCALAR('YRECLDP_RCOVPMIN', YRECLDP%RCOVPMIN) + CALL LOAD_SCALAR('YRECLDP_RCCN', YRECLDP%RCCN) + CALL LOAD_SCALAR('YRECLDP_RNICE', YRECLDP%RNICE) + CALL LOAD_SCALAR('YRECLDP_RCCNOM', YRECLDP%RCCNOM) + CALL LOAD_SCALAR('YRECLDP_RCCNSS', YRECLDP%RCCNSS) + CALL LOAD_SCALAR('YRECLDP_RCCNSU', YRECLDP%RCCNSU) + CALL LOAD_SCALAR('YRECLDP_RCLDTOPCF', YRECLDP%RCLDTOPCF) + CALL LOAD_SCALAR('YRECLDP_RDEPLIQREFRATE', YRECLDP%RDEPLIQREFRATE) + CALL LOAD_SCALAR('YRECLDP_RDEPLIQREFDEPTH', YRECLDP%RDEPLIQREFDEPTH) + CALL LOAD_SCALAR('YRECLDP_RCL_KKAac', YRECLDP%RCL_KKAac) + CALL LOAD_SCALAR('YRECLDP_RCL_KKBac', YRECLDP%RCL_KKBac) + CALL LOAD_SCALAR('YRECLDP_RCL_KKAau', YRECLDP%RCL_KKAau) + CALL LOAD_SCALAR('YRECLDP_RCL_KKBauq', YRECLDP%RCL_KKBauq) + CALL LOAD_SCALAR('YRECLDP_RCL_KKBaun', YRECLDP%RCL_KKBaun) + CALL LOAD_SCALAR('YRECLDP_RCL_KK_cloud_num_sea', YRECLDP%RCL_KK_cloud_num_sea) + CALL LOAD_SCALAR('YRECLDP_RCL_KK_cloud_num_land', YRECLDP%RCL_KK_cloud_num_land) + CALL LOAD_SCALAR('YRECLDP_RCL_AI', YRECLDP%RCL_AI) + CALL LOAD_SCALAR('YRECLDP_RCL_BI', YRECLDP%RCL_BI) + CALL LOAD_SCALAR('YRECLDP_RCL_CI', YRECLDP%RCL_CI) + CALL LOAD_SCALAR('YRECLDP_RCL_DI', YRECLDP%RCL_DI) + CALL LOAD_SCALAR('YRECLDP_RCL_X1I', YRECLDP%RCL_X1I) + CALL LOAD_SCALAR('YRECLDP_RCL_X2I', YRECLDP%RCL_X2I) + CALL LOAD_SCALAR('YRECLDP_RCL_X3I', YRECLDP%RCL_X3I) + CALL LOAD_SCALAR('YRECLDP_RCL_X4I', YRECLDP%RCL_X4I) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST1I', YRECLDP%RCL_CONST1I) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST2I', YRECLDP%RCL_CONST2I) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST3I', YRECLDP%RCL_CONST3I) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST4I', YRECLDP%RCL_CONST4I) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST5I', YRECLDP%RCL_CONST5I) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST6I', YRECLDP%RCL_CONST6I) + CALL LOAD_SCALAR('YRECLDP_RCL_APB1', YRECLDP%RCL_APB1) + CALL LOAD_SCALAR('YRECLDP_RCL_APB2', YRECLDP%RCL_APB2) + CALL LOAD_SCALAR('YRECLDP_RCL_APB3', YRECLDP%RCL_APB3) + CALL LOAD_SCALAR('YRECLDP_RCL_AS', YRECLDP%RCL_AS) + CALL LOAD_SCALAR('YRECLDP_RCL_BS', YRECLDP%RCL_BS) + CALL LOAD_SCALAR('YRECLDP_RCL_CS', YRECLDP%RCL_CS) + CALL LOAD_SCALAR('YRECLDP_RCL_DS', YRECLDP%RCL_DS) + CALL LOAD_SCALAR('YRECLDP_RCL_X1S', YRECLDP%RCL_X1S) + CALL LOAD_SCALAR('YRECLDP_RCL_X2S', YRECLDP%RCL_X2S) + CALL LOAD_SCALAR('YRECLDP_RCL_X3S', YRECLDP%RCL_X3S) + CALL LOAD_SCALAR('YRECLDP_RCL_X4S', YRECLDP%RCL_X4S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST1S', YRECLDP%RCL_CONST1S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST2S', YRECLDP%RCL_CONST2S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST3S', YRECLDP%RCL_CONST3S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST4S', YRECLDP%RCL_CONST4S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST5S', YRECLDP%RCL_CONST5S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST6S', YRECLDP%RCL_CONST6S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST7S', YRECLDP%RCL_CONST7S) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST8S', YRECLDP%RCL_CONST8S) + CALL LOAD_SCALAR('YRECLDP_RDENSWAT', YRECLDP%RDENSWAT) + CALL LOAD_SCALAR('YRECLDP_RDENSREF', YRECLDP%RDENSREF) + CALL LOAD_SCALAR('YRECLDP_RCL_AR', YRECLDP%RCL_AR) + CALL LOAD_SCALAR('YRECLDP_RCL_BR', YRECLDP%RCL_BR) + CALL LOAD_SCALAR('YRECLDP_RCL_CR', YRECLDP%RCL_CR) + CALL LOAD_SCALAR('YRECLDP_RCL_DR', YRECLDP%RCL_DR) + CALL LOAD_SCALAR('YRECLDP_RCL_X1R', YRECLDP%RCL_X1R) + CALL LOAD_SCALAR('YRECLDP_RCL_X2R', YRECLDP%RCL_X2R) + CALL LOAD_SCALAR('YRECLDP_RCL_X4R', YRECLDP%RCL_X4R) + CALL LOAD_SCALAR('YRECLDP_RCL_KA273', YRECLDP%RCL_KA273) + CALL LOAD_SCALAR('YRECLDP_RCL_CDENOM1', YRECLDP%RCL_CDENOM1) + CALL LOAD_SCALAR('YRECLDP_RCL_CDENOM2', YRECLDP%RCL_CDENOM2) + CALL LOAD_SCALAR('YRECLDP_RCL_CDENOM3', YRECLDP%RCL_CDENOM3) + CALL LOAD_SCALAR('YRECLDP_RCL_SCHMIDT', YRECLDP%RCL_SCHMIDT) + CALL LOAD_SCALAR('YRECLDP_RCL_DYNVISC', YRECLDP%RCL_DYNVISC) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST1R', YRECLDP%RCL_CONST1R) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST2R', YRECLDP%RCL_CONST2R) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST3R', YRECLDP%RCL_CONST3R) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST4R', YRECLDP%RCL_CONST4R) + CALL LOAD_SCALAR('YRECLDP_RCL_FAC1', YRECLDP%RCL_FAC1) + CALL LOAD_SCALAR('YRECLDP_RCL_FAC2', YRECLDP%RCL_FAC2) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST5R', YRECLDP%RCL_CONST5R) + CALL LOAD_SCALAR('YRECLDP_RCL_CONST6R', YRECLDP%RCL_CONST6R) + CALL LOAD_SCALAR('YRECLDP_RCL_FZRAB', YRECLDP%RCL_FZRAB) + CALL LOAD_SCALAR('YRECLDP_RCL_FZRBB', YRECLDP%RCL_FZRBB) + CALL LOAD_SCALAR('YRECLDP_LCLDEXTRA', YRECLDP%LCLDEXTRA) + CALL LOAD_SCALAR('YRECLDP_LCLDBUDGET', YRECLDP%LCLDBUDGET) + CALL LOAD_SCALAR('YRECLDP_NSSOPT', YRECLDP%NSSOPT) + CALL LOAD_SCALAR('YRECLDP_NCLDTOP', YRECLDP%NCLDTOP) + CALL LOAD_SCALAR('YRECLDP_NAECLBC', YRECLDP%NAECLBC) + CALL LOAD_SCALAR('YRECLDP_NAECLDU', YRECLDP%NAECLDU) + CALL LOAD_SCALAR('YRECLDP_NAECLOM', YRECLDP%NAECLOM) + CALL LOAD_SCALAR('YRECLDP_NAECLSS', YRECLDP%NAECLSS) + CALL LOAD_SCALAR('YRECLDP_NAECLSU', YRECLDP%NAECLSU) + CALL LOAD_SCALAR('YRECLDP_NCLDDIAG', YRECLDP%NCLDDIAG) + CALL LOAD_SCALAR('YRECLDP_NAERCLD', YRECLDP%NAERCLD) + CALL LOAD_SCALAR('YRECLDP_LAERLIQAUTOLSP', YRECLDP%LAERLIQAUTOLSP) + CALL LOAD_SCALAR('YRECLDP_LAERLIQAUTOCP', YRECLDP%LAERLIQAUTOCP) + CALL LOAD_SCALAR('YRECLDP_LAERLIQAUTOCPB', YRECLDP%LAERLIQAUTOCPB) + CALL LOAD_SCALAR('YRECLDP_LAERLIQCOLL', YRECLDP%LAERLIQCOLL) + CALL LOAD_SCALAR('YRECLDP_LAERICESED', YRECLDP%LAERICESED) + CALL LOAD_SCALAR('YRECLDP_LAERICEAUTO', YRECLDP%LAERICEAUTO) + CALL LOAD_SCALAR('YRECLDP_NSHAPEP', YRECLDP%NSHAPEP) + CALL LOAD_SCALAR('YRECLDP_NSHAPEQ', YRECLDP%NSHAPEQ) + CALL LOAD_SCALAR('YRECLDP_NBETA', YRECLDP%NBETA) + ! The last two are actually arrays, so treat them as fields + CALL LOAD_ARRAY('YRECLDP_RBETA', 1, 101, 101, 101, YRECLDP%RBETA(0:100)) + CALL LOAD_ARRAY('YRECLDP_RBETAP1', 1, 101, 101, 101, YRECLDP%RBETAP1(0:100)) + END SUBROUTINE YRECLDP_LOAD_PARAMETERS + +END MODULE YOECLDP diff --git a/src/cloudsc_python/src/cloudscf2py/yoethf.F90 b/src/cloudsc_python/src/cloudscf2py/yoethf.F90 new file mode 100644 index 00000000..7f885b44 --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/yoethf.F90 @@ -0,0 +1,160 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE YOETHF + +USE PARKIND1, ONLY : JPIM, JPRB +USE FILE_IO_MOD, ONLY : LOAD_SCALAR + +IMPLICIT NONE + +SAVE + +! ------------------------------------------------------------------ +!* *YOETHF* DERIVED CONSTANTS SPECIFIC TO ECMWF THERMODYNAMICS +! ------------------------------------------------------------------ + +REAL(KIND=JPRB) :: R2ES +REAL(KIND=JPRB) :: R3LES +REAL(KIND=JPRB) :: R3IES +REAL(KIND=JPRB) :: R4LES +REAL(KIND=JPRB) :: R4IES +REAL(KIND=JPRB) :: R5LES +REAL(KIND=JPRB) :: R5IES +REAL(KIND=JPRB) :: RVTMP2 +REAL(KIND=JPRB) :: RHOH2O +REAL(KIND=JPRB) :: R5ALVCP +REAL(KIND=JPRB) :: R5ALSCP +REAL(KIND=JPRB) :: RALVDCP +REAL(KIND=JPRB) :: RALSDCP +REAL(KIND=JPRB) :: RALFDCP +REAL(KIND=JPRB) :: RTWAT +REAL(KIND=JPRB) :: RTBER +REAL(KIND=JPRB) :: RTBERCU +REAL(KIND=JPRB) :: RTICE +REAL(KIND=JPRB) :: RTICECU +REAL(KIND=JPRB) :: RTWAT_RTICE_R +REAL(KIND=JPRB) :: RTWAT_RTICECU_R +REAL(KIND=JPRB) :: RKOOP1 +REAL(KIND=JPRB) :: RKOOP2 + +TYPE :: TOETHF +REAL(KIND=JPRB) :: R2ES +REAL(KIND=JPRB) :: R3LES +REAL(KIND=JPRB) :: R3IES +REAL(KIND=JPRB) :: R4LES +REAL(KIND=JPRB) :: R4IES +REAL(KIND=JPRB) :: R5LES +REAL(KIND=JPRB) :: R5IES +REAL(KIND=JPRB) :: RVTMP2 +REAL(KIND=JPRB) :: RHOH2O +REAL(KIND=JPRB) :: R5ALVCP +REAL(KIND=JPRB) :: R5ALSCP +REAL(KIND=JPRB) :: RALVDCP +REAL(KIND=JPRB) :: RALSDCP +REAL(KIND=JPRB) :: RALFDCP +REAL(KIND=JPRB) :: RTWAT +REAL(KIND=JPRB) :: RTBER +REAL(KIND=JPRB) :: RTBERCU +REAL(KIND=JPRB) :: RTICE +REAL(KIND=JPRB) :: RTICECU +REAL(KIND=JPRB) :: RTWAT_RTICE_R +REAL(KIND=JPRB) :: RTWAT_RTICECU_R +REAL(KIND=JPRB) :: RKOOP1 +REAL(KIND=JPRB) :: RKOOP2 +END TYPE TOETHF + +TYPE(TOETHF), ALLOCATABLE :: YRTHF + +! J.-J. MORCRETTE 91/07/14 ADAPTED TO I.F.S. + +! NAME TYPE PURPOSE +! ---- ---- ------- + +! *R__ES* REAL *CONSTANTS USED FOR COMPUTATION OF SATURATION +! MIXING RATIO OVER LIQUID WATER(*R_LES*) OR +! ICE(*R_IES*). +! *RVTMP2* REAL *RVTMP2=RCPV/RCPD-1. +! *RHOH2O* REAL *DENSITY OF LIQUID WATER. (RATM/100.) +! *R5ALVCP* REAL *R5LES*RLVTT/RCPD +! *R5ALSCP* REAL *R5IES*RLSTT/RCPD +! *RALVDCP* REAL *RLVTT/RCPD +! *RALSDCP* REAL *RLSTT/RCPD +! *RALFDCP* REAL *RLMLT/RCPD +! *RTWAT* REAL *RTWAT=RTT +! *RTBER* REAL *RTBER=RTT-0.05 +! *RTBERCU REAL *RTBERCU=RTT-5.0 +! *RTICE* REAL *RTICE=RTT-0.1 +! *RTICECU* REAL *RTICECU=RTT-23.0 +! *RKOOP? REAL *CONSTANTS TO DESCRIBE KOOP FORM FOR NUCLEATION +! *RTWAT_RTICE_R* REAL *RTWAT_RTICE_R=1./(RTWAT-RTICE) +! *RTWAT_RTICECU_R* REAL *RTWAT_RTICECU_R=1./(RTWAT-RTICECU) + +!$acc declare copyin(r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies, & +!$acc r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu, & +!$acc rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2) + +!$omp declare target(r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies) +!$omp declare target( r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu) +!$omp declare target( rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2) + +! ---------------------------------------------------------------- + +CONTAINS + + SUBROUTINE YOETHF_LOAD_PARAMETERS() + CALL LOAD_SCALAR('R2ES', R2ES) + CALL LOAD_SCALAR('R3LES', R3LES) + CALL LOAD_SCALAR('R3IES', R3IES) + CALL LOAD_SCALAR('R4LES', R4LES) + CALL LOAD_SCALAR('R4IES', R4IES) + CALL LOAD_SCALAR('R5LES', R5LES) + CALL LOAD_SCALAR('R5IES', R5IES) + CALL LOAD_SCALAR('R5ALVCP', R5ALVCP) + CALL LOAD_SCALAR('R5ALSCP', R5ALSCP) + CALL LOAD_SCALAR('RALVDCP', RALVDCP) + CALL LOAD_SCALAR('RALSDCP', RALSDCP) + CALL LOAD_SCALAR('RALFDCP', RALFDCP) + CALL LOAD_SCALAR('RTWAT', RTWAT) + CALL LOAD_SCALAR('RTICE', RTICE) + CALL LOAD_SCALAR('RTICECU', RTICECU) + CALL LOAD_SCALAR('RTWAT_RTICE_R', RTWAT_RTICE_R) + CALL LOAD_SCALAR('RTWAT_RTICECU_R', RTWAT_RTICECU_R) + CALL LOAD_SCALAR('RKOOP1', RKOOP1) + CALL LOAD_SCALAR('RKOOP2', RKOOP2) + CALL YRTHF_COPY_PARAMETERS() +!$acc update device(r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies, & +!$acc r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu, & +!$acc rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2) + END SUBROUTINE YOETHF_LOAD_PARAMETERS + + SUBROUTINE YRTHF_COPY_PARAMETERS() + IF(.NOT.ALLOCATED(YRTHF)) ALLOCATE(YRTHF) + YRTHF%R2ES = R2ES + YRTHF%R3LES = R3LES + YRTHF%R3IES = R3IES + YRTHF%R4LES = R4LES + YRTHF%R4IES = R4IES + YRTHF%R5LES = R5LES + YRTHF%R5IES = R5IES + YRTHF%R5ALVCP = R5ALVCP + YRTHF%R5ALSCP = R5ALSCP + YRTHF%RALVDCP = RALVDCP + YRTHF%RALSDCP = RALSDCP + YRTHF%RALFDCP = RALFDCP + YRTHF%RTWAT = RTWAT + YRTHF%RTICE = RTICE + YRTHF%RTICECU = RTICECU + YRTHF%RTWAT_RTICE_R = RTWAT_RTICE_R + YRTHF%RTWAT_RTICECU_R = RTWAT_RTICECU_R + YRTHF%RKOOP1 = RKOOP1 + YRTHF%RKOOP2 = RKOOP2 + END SUBROUTINE YRTHF_COPY_PARAMETERS + +END MODULE YOETHF diff --git a/src/cloudsc_python/src/cloudscf2py/yomcst.F90 b/src/cloudsc_python/src/cloudscf2py/yomcst.F90 new file mode 100644 index 00000000..7b5a3bd2 --- /dev/null +++ b/src/cloudsc_python/src/cloudscf2py/yomcst.F90 @@ -0,0 +1,338 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE YOMCST + +USE PARKIND1, ONLY : JPRB +USE FILE_IO_MOD, ONLY : LOAD_SCALAR + +IMPLICIT NONE + +SAVE + +! ------------------------------------------------------------------ + +!* Common of physical constants +! You will find the meanings in the annex 1 of the documentation + +! A1.0 Fundamental constants +! * RPI : number Pi +! * RCLUM : light velocity +! * RHPLA : Planck constant +! * RKBOL : Bolzmann constant +! * RNAVO : Avogadro number +REAL(KIND=JPRB) :: RPI +REAL(KIND=JPRB) :: RCLUM +REAL(KIND=JPRB) :: RHPLA +REAL(KIND=JPRB) :: RKBOL +REAL(KIND=JPRB) :: RNAVO + +! A1.1 Astronomical constants +! * RDAY : duration of the solar day +! * RDAYI : invariant time unit of 86400s +! * RHOUR : duration of the solar hour +! * REA : astronomical unit (mean distance Earth-sun) +! * REPSM : polar axis tilting angle +! * RSIYEA : duration of the sideral year +! * RSIDAY : duration of the sideral day +! * ROMEGA : angular velocity of the Earth rotation +REAL(KIND=JPRB) :: RDAY +REAL(KIND=JPRB) :: RDAYI +REAL(KIND=JPRB) :: RHOUR +REAL(KIND=JPRB) :: REA +REAL(KIND=JPRB) :: REPSM +REAL(KIND=JPRB) :: RSIYEA +REAL(KIND=JPRB) :: RSIDAY +REAL(KIND=JPRB) :: ROMEGA + +! A1.2 Geoide +! * RA : Earth radius +! * RG : gravity constant +! * R1SA : 1/RA +REAL(KIND=JPRB) :: RA +REAL(KIND=JPRB) :: RG +REAL(KIND=JPRB) :: R1SA + +! A1.3 Radiation +! * RSIGMA : Stefan-Bolzman constant +! * RI0 : solar constant +REAL(KIND=JPRB) :: RSIGMA +REAL(KIND=JPRB) :: RI0 + +! A1.4 Thermodynamic gas phase +! * R : perfect gas constant +! * RMD : dry air molar mass +! * RMV : vapour water molar mass +! * RMO3 : ozone molar mass +! * RD : R_dry (dry air constant) +! * RV : R_vap (vapour water constant) +! * RCPD : Cp_dry (dry air calorific capacity at constant pressure) +! * RCPV : Cp_vap (vapour calorific capacity at constant pressure) +! * RCVD : Cv_dry (dry air calorific capacity at constant volume) +! * RCVV : Cv_vap (vapour calorific capacity at constant volume) +! * RKAPPA : Kappa = R_dry/Cp_dry +! * RETV : R_vap/R_dry - 1 +! * RMCO2 : CO2 (carbon dioxyde) molar mass +! * RMCH4 : CH4 (methane) molar mass +! * RMN2O : N2O molar mass +! * RMCO : CO (carbon monoxyde) molar mass +! * RMHCHO : HCHO molar mass +! * RMNO2 : NO2 (nitrogen dioxyde) molar mass +! * RMSO2 : SO2 (sulfur dioxyde) molar mass +! * RMSO4 : SO4 (sulphate) molar mass +REAL(KIND=JPRB) :: R +REAL(KIND=JPRB) :: RMD +REAL(KIND=JPRB) :: RMV +REAL(KIND=JPRB) :: RMO3 +REAL(KIND=JPRB) :: RD +REAL(KIND=JPRB) :: RV +REAL(KIND=JPRB) :: RCPD +REAL(KIND=JPRB) :: RCPV +REAL(KIND=JPRB) :: RCVD +REAL(KIND=JPRB) :: RCVV +REAL(KIND=JPRB) :: RKAPPA +REAL(KIND=JPRB) :: RETV +REAL(KIND=JPRB) :: RMCO2 +REAL(KIND=JPRB) :: RMCH4 +REAL(KIND=JPRB) :: RMN2O +REAL(KIND=JPRB) :: RMCO +REAL(KIND=JPRB) :: RMHCHO +REAL(KIND=JPRB) :: RMNO2 +REAL(KIND=JPRB) :: RMSO2 +REAL(KIND=JPRB) :: RMSO4 + +! A1.5,6 Thermodynamic liquid,solid phases +! * RCW : Cw (calorific capacity of liquid water) +! * RCS : Cs (calorific capacity of solid water) +REAL(KIND=JPRB) :: RCW +REAL(KIND=JPRB) :: RCS + +! A1.7 Thermodynamic transition of phase +! * RATM : pre_n = "normal" pressure +! * RTT : Tt = temperature of water fusion at "pre_n" +! * RLVTT : RLvTt = vaporisation latent heat at T=Tt +! * RLSTT : RLsTt = sublimation latent heat at T=Tt +! * RLVZER : RLv0 = vaporisation latent heat at T=0K +! * RLSZER : RLs0 = sublimation latent heat at T=0K +! * RLMLT : RLMlt = melting latent heat at T=Tt +! * RDT : Tt - Tx(ew-ei) +REAL(KIND=JPRB) :: RATM +REAL(KIND=JPRB) :: RTT +REAL(KIND=JPRB) :: RLVTT +REAL(KIND=JPRB) :: RLSTT +REAL(KIND=JPRB) :: RLVZER +REAL(KIND=JPRB) :: RLSZER +REAL(KIND=JPRB) :: RLMLT +REAL(KIND=JPRB) :: RDT + +! A1.8 Curve of saturation +! * RESTT : es(Tt) = saturation vapour tension at T=Tt +! * RGAMW : Rgamw = (Cw-Cp_vap)/R_vap +! * RBETW : Rbetw = RLvTt/R_vap + Rgamw*Tt +! * RALPW : Ralpw = log(es(Tt)) + Rbetw/Tt + Rgamw*log(Tt) +! * RGAMS : Rgams = (Cs-Cp_vap)/R_vap +! * RBETS : Rbets = RLsTt/R_vap + Rgams*Tt +! * RALPS : Ralps = log(es(Tt)) + Rbets/Tt + Rgams*log(Tt) +! * RALPD : Ralpd = Ralps - Ralpw +! * RBETD : Rbetd = Rbets - Rbetw +! * RGAMD : Rgamd = Rgams - Rgamw +REAL(KIND=JPRB) :: RESTT +REAL(KIND=JPRB) :: RGAMW +REAL(KIND=JPRB) :: RBETW +REAL(KIND=JPRB) :: RALPW +REAL(KIND=JPRB) :: RGAMS +REAL(KIND=JPRB) :: RBETS +REAL(KIND=JPRB) :: RALPS +REAL(KIND=JPRB) :: RALPD +REAL(KIND=JPRB) :: RBETD +REAL(KIND=JPRB) :: RGAMD + +! NaN value +! CHARACTER(LEN=8), PARAMETER :: CSNAN = & +! & CHAR(0)//CHAR(0)//CHAR(0)//CHAR(0)//CHAR(0)//CHAR(0)//CHAR(244)//CHAR(127) +REAL(KIND=JPRB) :: RSNAN + +!$acc declare copyin(rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, rv) +!$omp declare target(rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, rv) + + +! ------------------------------------------------------------------ + +TYPE :: TOMCST +! A1.0 Fundamental constants +! * RPI : number Pi +! * RCLUM : light velocity +! * RHPLA : Planck constant +! * RKBOL : Bolzmann constant +! * RNAVO : Avogadro number +REAL(KIND=JPRB) :: RPI +REAL(KIND=JPRB) :: RCLUM +REAL(KIND=JPRB) :: RHPLA +REAL(KIND=JPRB) :: RKBOL +REAL(KIND=JPRB) :: RNAVO + +! A1.1 Astronomical constants +! * RDAY : duration of the solar day +! * RDAYI : invariant time unit of 86400s +! * RHOUR : duration of the solar hour +! * REA : astronomical unit (mean distance Earth-sun) +! * REPSM : polar axis tilting angle +! * RSIYEA : duration of the sideral year +! * RSIDAY : duration of the sideral day +! * ROMEGA : angular velocity of the Earth rotation +REAL(KIND=JPRB) :: RDAY +REAL(KIND=JPRB) :: RDAYI +REAL(KIND=JPRB) :: RHOUR +REAL(KIND=JPRB) :: REA +REAL(KIND=JPRB) :: REPSM +REAL(KIND=JPRB) :: RSIYEA +REAL(KIND=JPRB) :: RSIDAY +REAL(KIND=JPRB) :: ROMEGA + +! A1.2 Geoide +! * RA : Earth radius +! * RG : gravity constant +! * R1SA : 1/RA +REAL(KIND=JPRB) :: RA +REAL(KIND=JPRB) :: RG +REAL(KIND=JPRB) :: R1SA + +! A1.3 Radiation +! * RSIGMA : Stefan-Bolzman constant +! * RI0 : solar constant +REAL(KIND=JPRB) :: RSIGMA +REAL(KIND=JPRB) :: RI0 + +! A1.4 Thermodynamic gas phase +! * R : perfect gas constant +! * RMD : dry air molar mass +! * RMV : vapour water molar mass +! * RMO3 : ozone molar mass +! * RD : R_dry (dry air constant) +! * RV : R_vap (vapour water constant) +! * RCPD : Cp_dry (dry air calorific capacity at constant pressure) +! * RCPV : Cp_vap (vapour calorific capacity at constant pressure) +! * RCVD : Cv_dry (dry air calorific capacity at constant volume) +! * RCVV : Cv_vap (vapour calorific capacity at constant volume) +! * RKAPPA : Kappa = R_dry/Cp_dry +! * RETV : R_vap/R_dry - 1 +! * RMCO2 : CO2 (carbon dioxyde) molar mass +! * RMCH4 : CH4 (methane) molar mass +! * RMN2O : N2O molar mass +! * RMCO : CO (carbon monoxyde) molar mass +! * RMHCHO : HCHO molar mass +! * RMNO2 : NO2 (nitrogen dioxyde) molar mass +! * RMSO2 : SO2 (sulfur dioxyde) molar mass +! * RMSO4 : SO4 (sulphate) molar mass +REAL(KIND=JPRB) :: R +REAL(KIND=JPRB) :: RMD +REAL(KIND=JPRB) :: RMV +REAL(KIND=JPRB) :: RMO3 +REAL(KIND=JPRB) :: RD +REAL(KIND=JPRB) :: RV +REAL(KIND=JPRB) :: RCPD +REAL(KIND=JPRB) :: RCPV +REAL(KIND=JPRB) :: RCVD +REAL(KIND=JPRB) :: RCVV +REAL(KIND=JPRB) :: RKAPPA +REAL(KIND=JPRB) :: RETV +REAL(KIND=JPRB) :: RMCO2 +REAL(KIND=JPRB) :: RMCH4 +REAL(KIND=JPRB) :: RMN2O +REAL(KIND=JPRB) :: RMCO +REAL(KIND=JPRB) :: RMHCHO +REAL(KIND=JPRB) :: RMNO2 +REAL(KIND=JPRB) :: RMSO2 +REAL(KIND=JPRB) :: RMSO4 + +! A1.5,6 Thermodynamic liquid,solid phases +! * RCW : Cw (calorific capacity of liquid water) +! * RCS : Cs (calorific capacity of solid water) +REAL(KIND=JPRB) :: RCW +REAL(KIND=JPRB) :: RCS + +! A1.7 Thermodynamic transition of phase +! * RATM : pre_n = "normal" pressure +! * RTT : Tt = temperature of water fusion at "pre_n" +! * RLVTT : RLvTt = vaporisation latent heat at T=Tt +! * RLSTT : RLsTt = sublimation latent heat at T=Tt +! * RLVZER : RLv0 = vaporisation latent heat at T=0K +! * RLSZER : RLs0 = sublimation latent heat at T=0K +! * RLMLT : RLMlt = melting latent heat at T=Tt +! * RDT : Tt - Tx(ew-ei) +REAL(KIND=JPRB) :: RATM +REAL(KIND=JPRB) :: RTT +REAL(KIND=JPRB) :: RLVTT +REAL(KIND=JPRB) :: RLSTT +REAL(KIND=JPRB) :: RLVZER +REAL(KIND=JPRB) :: RLSZER +REAL(KIND=JPRB) :: RLMLT +REAL(KIND=JPRB) :: RDT + +! A1.8 Curve of saturation +! * RESTT : es(Tt) = saturation vapour tension at T=Tt +! * RGAMW : Rgamw = (Cw-Cp_vap)/R_vap +! * RBETW : Rbetw = RLvTt/R_vap + Rgamw*Tt +! * RALPW : Ralpw = log(es(Tt)) + Rbetw/Tt + Rgamw*log(Tt) +! * RGAMS : Rgams = (Cs-Cp_vap)/R_vap +! * RBETS : Rbets = RLsTt/R_vap + Rgams*Tt +! * RALPS : Ralps = log(es(Tt)) + Rbets/Tt + Rgams*log(Tt) +! * RALPD : Ralpd = Ralps - Ralpw +! * RBETD : Rbetd = Rbets - Rbetw +! * RGAMD : Rgamd = Rgams - Rgamw +REAL(KIND=JPRB) :: RESTT +REAL(KIND=JPRB) :: RGAMW +REAL(KIND=JPRB) :: RBETW +REAL(KIND=JPRB) :: RALPW +REAL(KIND=JPRB) :: RGAMS +REAL(KIND=JPRB) :: RBETS +REAL(KIND=JPRB) :: RALPS +REAL(KIND=JPRB) :: RALPD +REAL(KIND=JPRB) :: RBETD +REAL(KIND=JPRB) :: RGAMD + +! NaN value +! CHARACTER(LEN=8), PARAMETER :: CSNAN = & +! & CHAR(0)//CHAR(0)//CHAR(0)//CHAR(0)//CHAR(0)//CHAR(0)//CHAR(244)//CHAR(127) +REAL(KIND=JPRB) :: RSNAN + +END TYPE TOMCST + +TYPE(TOMCST), ALLOCATABLE :: YRCST + +CONTAINS + + SUBROUTINE YOMCST_LOAD_PARAMETERS() + CALL LOAD_SCALAR('RG', RG) + CALL LOAD_SCALAR('RD', RD) + CALL LOAD_SCALAR('RCPD', RCPD) + CALL LOAD_SCALAR('RETV', RETV) + CALL LOAD_SCALAR('RLVTT', RLVTT) + CALL LOAD_SCALAR('RLSTT', RLSTT) + CALL LOAD_SCALAR('RLMLT', RLMLT) + CALL LOAD_SCALAR('RTT', RTT) + CALL LOAD_SCALAR('RV', RV) + CALL YRCST_COPY_PARAMETERS() +!$acc update device(rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, rv) + END SUBROUTINE YOMCST_LOAD_PARAMETERS + + SUBROUTINE YRCST_COPY_PARAMETERS() + IF(.NOT.ALLOCATED(YRCST)) ALLOCATE(YRCST) + YRCST%RG = RG + YRCST%RD = RD + YRCST%RCPD = RCPD + YRCST%RETV = RETV + YRCST%RLVTT = RLVTT + YRCST%RLSTT = RLSTT + YRCST%RLMLT = RLMLT + YRCST%RTT = RTT + YRCST%RV = RV + END SUBROUTINE YRCST_COPY_PARAMETERS + +END MODULE YOMCST From 9d460a06a4da97a314250a00b2558d43c09e72d7 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 22 Feb 2023 15:54:26 +0000 Subject: [PATCH 024/174] Python: Add output validation for Loki-generated pure-Python driver --- src/cloudsc_python/drivers/cloudsc_f2py.py | 44 ++++++++++++++++++++ src/cloudsc_python/src/cloudscf2py/inputs.py | 7 +++- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/cloudsc_python/drivers/cloudsc_f2py.py b/src/cloudsc_python/drivers/cloudsc_f2py.py index 2393881d..0ca0996b 100644 --- a/src/cloudsc_python/drivers/cloudsc_f2py.py +++ b/src/cloudsc_python/drivers/cloudsc_f2py.py @@ -10,6 +10,7 @@ import click from pathlib import Path +import numpy as np def loki_generate_kernel(source_path, out_path, include_dir=None): @@ -34,6 +35,47 @@ def loki_generate_kernel(source_path, out_path, include_dir=None): f2py.apply(kernel, role='kernel', path=out_path) +def cloudsc_validate(fields, ref_fields, kidia, kfdia): + _field_names = [ + 'plude', 'pcovptot', 'prainfrac_toprfz', 'pfsqlf', 'pfsqif', + 'pfcqlng', 'pfcqnng', 'pfsqrf', 'pfsqsf', 'pfcqrng', 'pfcqsng', + 'pfsqltur', 'pfsqitur', 'pfplsl', 'pfplsn', 'pfhpsl', 'pfhpsn', + 'tendency_loc_a', 'tendency_loc_q', 'tendency_loc_t', + 'tendency_loc_cld' + ] + ngptot = kfdia - kidia + 1 + + print( + " Variable Dim MinValue MaxValue" + " AbsMaxErr AvgAbsErr/GP MaxRelErr-%" + ) + for name in _field_names: + if len(fields[name].shape) == 1: + f = fields[name][kidia-1:kfdia] + ref = ref_fields[name][kidia-1:kfdia] + elif len(fields[name].shape) == 2: + f = fields[name][:,kidia-1:kfdia] + ref = ref_fields[name][:,kidia-1:kfdia] + elif len(fields[name].shape) == 3: + f = fields[name][:,:,kidia-1:kfdia] + ref = ref_fields[name][:,:,kidia-1:kfdia] + else: + f = fields[name] + ref = ref_fields[name] + zsum = np.sum(np.absolute(ref)) + zerrsum = np.sum(np.absolute(f - ref)) + zeps = np.finfo(np.float64).eps + print( + ' {fname:>20} {fmin:20.13e} {fmax:20.13e} {absmax:20.13e} ' + ' {absavg:20.13e} {maxrel:20.13e}'.format( + fname=name.upper(), fmin=f.min(), fmax=f.max(), + absmax=np.absolute(f - ref).max(), + absavg=np.sum(np.absolute(f - ref)) / ngptot, + maxrel=0.0 if zerrsum < zeps else (zerrsum/(1.0+zsum) if zsum < zeps else zerrsum/zsum) + ) + ) + + def run_cloudsc_kernel(nthreads, ngptot, nproma, input_path, reference_path): from cloudscf2py import ( load_input_fields, load_input_parameters, load_reference_fields, @@ -54,7 +96,9 @@ def run_cloudsc_kernel(nthreads, ngptot, nproma, input_path, reference_path): yrecldp=yrecldp, ydcst=yrmcst, ydthf=yrethf, ) + # Validate the output fields against reference data reference = load_reference_fields(path=reference_path) + cloudsc_validate(cloudsc_args, reference, kidia=1, kfdia=ngptot) @click.command() diff --git a/src/cloudsc_python/src/cloudscf2py/inputs.py b/src/cloudsc_python/src/cloudscf2py/inputs.py index e2986b9e..039e6a2f 100644 --- a/src/cloudsc_python/src/cloudscf2py/inputs.py +++ b/src/cloudsc_python/src/cloudscf2py/inputs.py @@ -149,8 +149,11 @@ def load_reference_fields(path): fields = OrderedDict() argnames = [ - 'PLUDE', 'PCOVPTOT', 'PFPLSL', 'PFPLSN', 'PFHPSL', 'PFHPSN', - 'TENDENCY_LOC_A', 'TENDENCY_LOC_Q', 'TENDENCY_LOC_T', 'TENDENCY_LOC_CLD', + 'PLUDE', 'PCOVPTOT', 'PRAINFRAC_TOPRFZ', 'PFSQLF', 'PFSQIF', + 'PFCQLNG', 'PFCQNNG', 'PFSQRF', 'PFSQSF', 'PFCQRNG', 'PFCQSNG', + 'PFSQLTUR', 'PFSQITUR', 'PFPLSL', 'PFPLSN', 'PFHPSL', 'PFHPSN', + 'TENDENCY_LOC_A', 'TENDENCY_LOC_Q', 'TENDENCY_LOC_T', + 'TENDENCY_LOC_CLD' ] with h5py.File(path, 'r') as f: From adca039694d4013754bd1583f619d40cc64ceb19 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 28 Apr 2023 13:41:39 +0000 Subject: [PATCH 025/174] Python: Switch to pyproject.toml install configuration This already install `run_f2py.py` into the venv, but not yet `run.py` and `run_split.py` for the GT4Py variants. --- src/cloudsc_python/drivers/cloudsc_f2py.py | 2 +- src/cloudsc_python/pyproject.toml | 67 ++++++++++++++-------- src/cloudsc_python/setup.cfg | 62 -------------------- src/cloudsc_python/setup.py | 21 ------- 4 files changed, 44 insertions(+), 108 deletions(-) delete mode 100644 src/cloudsc_python/setup.cfg delete mode 100644 src/cloudsc_python/setup.py diff --git a/src/cloudsc_python/drivers/cloudsc_f2py.py b/src/cloudsc_python/drivers/cloudsc_f2py.py index 0ca0996b..f9e5a084 100644 --- a/src/cloudsc_python/drivers/cloudsc_f2py.py +++ b/src/cloudsc_python/drivers/cloudsc_f2py.py @@ -118,7 +118,7 @@ def run_cloudsc_kernel(nthreads, ngptot, nproma, input_path, reference_path): '--generate/--no-generate', default=False, help='(Re)generate kernel via Loki-Fortran-Python transform' ) -def dwarf_cloudsc(nthreads, ngptot, nproma, generate): +def main(nthreads, ngptot, nproma, generate): """ Run a Python version of CLOUDSC and validate against reference data """ diff --git a/src/cloudsc_python/pyproject.toml b/src/cloudsc_python/pyproject.toml index b3413f4e..aa29e770 100644 --- a/src/cloudsc_python/pyproject.toml +++ b/src/cloudsc_python/pyproject.toml @@ -1,28 +1,47 @@ [build-system] -requires = ['setuptools>=42', 'wheel'] +requires = ["setuptools >= 64"] +build-backend = "setuptools.build_meta" -[tool.setuptools_scm] +[project] +name = "cloudsc4py" +version = "0.1.0" +authors = [ + {name = "Stefano Ubbiali", email = "subbiali@phys.ethz.ch"}, + {name = "Michael Lange", email = "michael.lange@ecmwf.int"} +] +description = "Collection of Python variants of the CLOUDSC dwarf" +readme = "README.md" +requires-python = ">=3.8" +license = {file = "LICENSE"} +classifiers = [ + " Development Status :: 3 - Alpha ", + " Intended Audience:: Science / Research ", + " License :: OSI Approved:: Apache License, Version 2.0 ", + " Natural Language :: English ", + " Operating System :: POSIX ", + " Programming Language :: Python :: 3.8 ", + " Programming Language :: Python :: 3.9 ", + " Programming Language :: Python :: 3.10 ", + " Programming Language :: Python :: 3.11 ", + " Topic :: Scientific/Engineering :: Atmospheric Science " +] +dependencies = [ + "click", + "gt4py[dace] >= 1.0.1", + "h5py", + "numpy", + "pandas", + "pydantic", + "sympl @ git+https://github.com/stubbiali/sympl.git@oop#egg=sympl", + "xarray", +] -[tool.black] -line-length = 100 -target-version = ['py37', 'py38', 'py39'] -include = '\.pyi?$' -exclude = ''' -/( - \.eggs - | \.git - | \.hg - | \.mypy_cache - | \.tox - | \.venv - | _build - | buck-out - | build - | dist +[project.scripts] +"cloudsc_f2py.py" = "drivers.cloudsc_f2py:main" - # The following are specific to Black, you probably don't want those. - | blib2to3 - | tests/data - | profiling -)/ -''' \ No newline at end of file +[project.urls] +repository = "https://github.com/ecmwf-ifs/dwarf-p-cloudsc" + +[tool.setuptools.packages.find] +where = ["src", "."] +include = ["cloudsc4py", "cloudscf2py", "drivers*"] diff --git a/src/cloudsc_python/setup.cfg b/src/cloudsc_python/setup.cfg deleted file mode 100644 index 6fb86478..00000000 --- a/src/cloudsc_python/setup.cfg +++ /dev/null @@ -1,62 +0,0 @@ -[metadata] -name = cloudsc4py -description = GT4Py-based implementation of the CLOUDSC dwarf -author = ETH Zurich, ECMWF -author_email = subbiali@phys.ethz.ch, michael.lange@ecmwf.int -license = Apache-2.0 -license_file = ../../LICENSE -;long_description = file: ../../README.md -;long_description_content_type = text/markdown -project_urls = - Source = https://github.com/ecmwf-ifs/dwarf-p-cloudsc -platforms = Linux, Mac -classifiers = - Development Status :: 3 - Alpha - Intended Audience:: Science / Research - License :: OSI Approved:: Apache License, Version 2.0 - Natural Language :: English - Operating System :: POSIX - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Topic :: Scientific/Engineering :: Atmospheric Science - -[options] -zip_safe = False -packages = find: -include_package_data = True -python_requires = >= 3.7 -package_dir = - cloudsc4py = src/cloudsc4py - cloudscf2py = src/cloudscf2py -install_requires = - click - gt4py[dace] >= 1.0.1 - h5py - numpy - pandas - pydantic - sympl @ git+https://github.com/stubbiali/sympl.git@oop#egg=sympl - xarray - -[options.packages.find] -where = src -exclude = - data - drivers - scripts - tests - -;[tool:pytest] -;testpaths = tests - -;[build_sphinx] -;source-dir = docs/source -;build-dir = docs/build -;builder = html latexpdf - -[flake8] -exclude = - .eggs - .git -max-line-length = 100 diff --git a/src/cloudsc_python/setup.py b/src/cloudsc_python/setup.py deleted file mode 100644 index 40c9b8da..00000000 --- a/src/cloudsc_python/setup.py +++ /dev/null @@ -1,21 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from setuptools import setup -import sys - - -if sys.version_info.major < 3: - print("Python 3.x is required.") - sys.exit(1) - - -setup(use_scm_version=False) From e66602c51aeedd569ba6bd05fbd95520b6b57be7 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Tue, 2 May 2023 11:07:24 +0000 Subject: [PATCH 026/174] CLOUDSC-Python: Add bundle and CMake install options for cloudscf2py This includes a virtualenv setup, installation via add_custom_command depndency chain and a small ctest runner for the standalone Python variant. --- bundle.yml | 9 +++++ src/CMakeLists.txt | 1 + src/cloudsc_python/CMakeLists.txt | 67 +++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 src/cloudsc_python/CMakeLists.txt diff --git a/bundle.yml b/bundle.yml index 7669d06b..c087d131 100644 --- a/bundle.yml +++ b/bundle.yml @@ -106,6 +106,11 @@ options : help : Frontend parser to use for Loki transformations cmake : LOKI_FRONTEND={{value}} + - with-python : + help : Enable Python variants of CLOUDSC + cmake : > + CLOUDSC_PYTHON_F2PY=ON + - cloudsc-prototype1 : help : Build the original operational Fortran prototype [ON|OFF] cmake : ENABLE_CLOUDSC_PROTOTYPE1={{value}} @@ -142,6 +147,10 @@ options : help : Build the deprecated Loki+CLAW-based GPU version CLOUDSC [ON|OFF] cmake : ENABLE_CLOUDSC_LOKI_CLAW={{value}} + - cloudsc-python-f2py : + help : Enable dedicated pure Python variant of CLOUDSC [ON|OFF] + cmake : ENABLE_CLOUDSC_PYTHON_F2PY={{value}} + - hdf5 : help : Enable use of HDF5 input file [ON|OFF] cmake : ENABLE_HDF5={{value}} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b1513fb3..f6789f08 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(prototype1) add_subdirectory(common) add_subdirectory(cloudsc_fortran) add_subdirectory(cloudsc_pyiface) +add_subdirectory(cloudsc_python) add_subdirectory(cloudsc_c) add_subdirectory(cloudsc_cuda) add_subdirectory(cloudsc_gpu) diff --git a/src/cloudsc_python/CMakeLists.txt b/src/cloudsc_python/CMakeLists.txt new file mode 100644 index 00000000..7e0ffdc1 --- /dev/null +++ b/src/cloudsc_python/CMakeLists.txt @@ -0,0 +1,67 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Define this dwarf variant as an ECBuild feature +ecbuild_add_option( FEATURE CLOUDSC_PYTHON_F2PY + DESCRIPTION "Build the pure Python variant from Loki transpilation" DEFAULT OFF + CONDITION HDF5_FOUND +) + +if( HAVE_CLOUDSC_PYTHON_F2PY ) + + # Utilities to manage Python virtual environments + include( python_venv ) + + # Set up a custom venv for this variant and install the necessary dependencies + set( cloudsc_VENV_PATH ${CMAKE_BINARY_DIR}/venv_cloudsc ) + setup_python_venv( ${cloudsc_VENV_PATH} ) + + # Update to latest pip versionxs + execute_process(COMMAND ${Python3_EXECUTABLE} -m pip install --upgrade pip) + + if( NOT Python3_EXECUTABLE ) + ecbuild_error("[PyIface] Could not find Python3 executable in virtualenv") + endif() + + # Install the "cloudscf2py" Python package and runner in editable mode + add_custom_command( OUTPUT ${Python3_VENV_BIN}/cloudsc_f2py.py + COMMAND COMMAND ${Python3_EXECUTABLE} -m pip install -e ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "[CLOUDSC-Python] Installing cloudscf2py into virtualenv [${cloudsc_VENV_PATH}]" + ) + + # Copy the CLI driver script into the bin directory for execution + add_custom_command( OUTPUT ${CMAKE_BINARY_DIR}/bin/cloudsc_f2py.py + COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/bin + COMMAND ${CMAKE_COMMAND} -E create_symlink ${Python3_VENV_BIN}/cloudsc_f2py.py ${CMAKE_BINARY_DIR}/bin/cloudsc_f2py.py + DEPENDS ${Python3_VENV_BIN}/cloudsc_f2py.py + COMMENT "[CLOUDSC-Python] Linking Python driver scripts from virtualenv [${cloudsc_VENV_PATH}]" + ) + + # Add runner script as a custom executable target + add_custom_target( cloudsc-f2py ALL DEPENDS ${CMAKE_BINARY_DIR}/bin/cloudsc_f2py.py ) + + ecbuild_add_test( + COMMAND bin/cloudsc_f2py.py + ARGS --ngptot=100 --nproma=16 + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + OMP 1 + ) + + + if( HAVE_HDF5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 + ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 + ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 + ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 + ) + endif() + +endif() From 06748f32f85f527389baaaead9106ed54f159c39 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Tue, 2 May 2023 11:53:32 +0000 Subject: [PATCH 027/174] CLOUDSC-Python: Add new cloudsc_f2py.py runner to CI pipeline --- .github/scripts/run-targets.sh | 3 +++ .github/scripts/verify-targets.sh | 5 +++++ .github/workflows/build.yml | 11 +++++++++-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/scripts/run-targets.sh b/.github/scripts/run-targets.sh index 1c4c6128..ac802ee2 100755 --- a/.github/scripts/run-targets.sh +++ b/.github/scripts/run-targets.sh @@ -49,6 +49,9 @@ do elif [[ "$target" == "cloudsc_pyiface.py" ]] then bin/$target --numomp 1 --ngptot 100 --nproma 64 + elif [[ "$target" == "cloudsc_f2py.py" ]] + then + bin/$target --nthreads 1 --ngptot 100 --nproma 64 else # Single thread, safe NPROMA bin/$target 1 100 64 diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 9528f65a..7024e32b 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -56,6 +56,11 @@ then targets+=(cloudsc_pyiface.py) fi +if [[ "$python_flag" == "--cloudsc-python-f2py=ON" ]] +then + targets+=(cloudsc_f2py.py) +fi + # # Verify each target exists # diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6c5c310e..fd03258e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ on: jobs: # This workflow contains a single job called "build" build: - name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }} ${{ matrix.pyiface_flag }} + name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }} ${{ matrix.pyiface_flag }} ${{ matrix.python_flag }} # The type of runner that the job will run on runs-on: ubuntu-20.04 @@ -46,6 +46,8 @@ jobs: pyiface_flag: [''] # Flag to enable Python-interface variant + python_flag: [''] # Flag to enable Python variants + include: # Add nvhpc build configurations with serialbox and HDF5 - arch: github/ubuntu/nvhpc/21.9 @@ -56,6 +58,7 @@ jobs: cuda_flag: '--with-cuda' loki_flag: '--with-loki' pyiface_flag: '' + python_flag: '' - arch: github/ubuntu/nvhpc/21.9 io_library_flag: '--with-serialbox' mpi_flag: '' @@ -64,6 +67,7 @@ jobs: cuda_flag: '--with-cuda' loki_flag: '--with-loki' pyiface_flag: '' + python_flag: '' # Add pyiface build configuration for HDF5 only - arch: github/ubuntu/gnu/9.4.0 io_library_flag: '' @@ -73,6 +77,7 @@ jobs: cuda_flag: '' loki_flag: '' pyiface_flag: '--cloudsc-fortran-pyiface=ON' + python_flag: '--cloudsc-python-f2py=ON' # Steps represent a sequence of tasks that will be executed as part of the job steps: @@ -118,7 +123,8 @@ jobs: ./cloudsc-bundle build --retry-verbose \ --arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \ ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} \ - ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} ${{ matrix.pyiface_flag }} + ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} \ + ${{ matrix.pyiface_flag }} ${{ matrix.python_flag }} # Verify targets exist - name: Verify targets @@ -130,6 +136,7 @@ jobs: loki_flag: ${{ matrix.loki_flag }} claw_flag: ${{ matrix.claw_flag }} pyiface_flag: ${{ matrix.pyiface_flag }} + python_flag: ${{ matrix.python_flag }} run: .github/scripts/verify-targets.sh # Run double-precision targets From d943e7f125614403497d642cc8f4c978725ea985 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 9 May 2023 00:17:22 +0200 Subject: [PATCH 028/174] using Atlas block view on FieldSet --- .../cloudsc_driver_mod.F90 | 204 +++++------ .../cloudsc_global_atlas_state_mod.F90 | 331 ++++++++++-------- .../dwarf_cloudsc_atlas.F90 | 37 +- .../expand_atlas_mod.F90 | 14 +- .../validate_atlas_mod.F90 | 2 +- 5 files changed, 304 insertions(+), 284 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index 25b22254..8a84c043 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -14,84 +14,79 @@ MODULE CLOUDSC_DRIVER_MOD USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM USE EC_PMON_MOD, ONLY: EC_PMON + USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW + + USE ATLAS_MODULE + USE, INTRINSIC :: ISO_C_BINDING + USE ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS_MODULE IMPLICIT NONE CONTAINS - SUBROUTINE CLOUDSC_DRIVER( & - & NUMOMP, NPROMA, NLEV, NGPTOT, NGPTOTG, KFLDX, PTSPHY, & - & PT, PQ, TENDENCY_CML, TENDENCY_TMP, TENDENCY_LOC, & - & PVFA, PVFL, PVFI, PDYNA, PDYNL, PDYNI, & - & PHRSW, PHRLW, & - & PVERVEL, PAP, PAPH, & - & PLSM, LDCUM, KTYPE, & - & PLU, PLUDE, PSNDE, PMFU, PMFD, & - & PA, PCLV, PSUPSAT,& - & PLCRIT_AER,PICRIT_AER, PRE_ICE, & - & PCCN, PNICE,& - & PCOVPTOT, PRAINFRAC_TOPRFZ, & - & PFSQLF, PFSQIF , PFCQNNG, PFCQLNG, & - & PFSQRF, PFSQSF , PFCQRNG, PFCQSNG, & - & PFSQLTUR, PFSQITUR, & - & PFPLSL, PFPLSN, PFHPSL, PFHPSN & - & ) + SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) + ! Driver routine that performans the parallel NPROMA-blocking and ! invokes the CLOUDSC kernel - INTEGER(KIND=JPIM), INTENT(IN) :: NUMOMP, NPROMA, NLEV, NGPTOT, NGPTOTG - INTEGER(KIND=JPIM), INTENT(IN) :: KFLDX - REAL(KIND=JPRB), INTENT(IN) :: PTSPHY ! Physics timestep - REAL(KIND=JPRB), INTENT(IN) :: PT(:,:,:) ! T at start of callpar - REAL(KIND=JPRB), INTENT(IN) :: PQ(:,:,:) ! Q at start of callpar - TYPE(STATE_TYPE), INTENT(IN) :: TENDENCY_CML(:) ! cumulative tendency used for final output - TYPE(STATE_TYPE), INTENT(IN) :: TENDENCY_TMP(:) ! cumulative tendency used as input - TYPE(STATE_TYPE), INTENT(OUT) :: TENDENCY_LOC(:) ! local tendency from cloud scheme - REAL(KIND=JPRB), INTENT(IN) :: PVFA(:,:,:) ! CC from VDF scheme - REAL(KIND=JPRB), INTENT(IN) :: PVFL(:,:,:) ! Liq from VDF scheme - REAL(KIND=JPRB), INTENT(IN) :: PVFI(:,:,:) ! Ice from VDF scheme - REAL(KIND=JPRB), INTENT(IN) :: PDYNA(:,:,:) ! CC from Dynamics - REAL(KIND=JPRB), INTENT(IN) :: PDYNL(:,:,:) ! Liq from Dynamics - REAL(KIND=JPRB), INTENT(IN) :: PDYNI(:,:,:) ! Liq from Dynamics - REAL(KIND=JPRB), INTENT(IN) :: PHRSW(:,:,:) ! Short-wave heating rate - REAL(KIND=JPRB), INTENT(IN) :: PHRLW(:,:,:) ! Long-wave heating rate - REAL(KIND=JPRB), INTENT(IN) :: PVERVEL(:,:,:) !Vertical velocity - REAL(KIND=JPRB), INTENT(IN) :: PAP(:,:,:) ! Pressure on full levels - REAL(KIND=JPRB), INTENT(IN) :: PAPH(:,:,:) ! Pressure on half levels - REAL(KIND=JPRB), INTENT(IN) :: PLSM(:,:) ! Land fraction (0-1) - LOGICAL , INTENT(IN) :: LDCUM(:,:) ! Convection active - INTEGER(KIND=JPIM), INTENT(IN) :: KTYPE(:,:) ! Convection type 0,1,2 - REAL(KIND=JPRB), INTENT(IN) :: PLU(:,:,:) ! Conv. condensate - REAL(KIND=JPRB), INTENT(INOUT) :: PLUDE(:,:,:) ! Conv. detrained water - REAL(KIND=JPRB), INTENT(IN) :: PSNDE(:,:,:) ! Conv. detrained snow - REAL(KIND=JPRB), INTENT(IN) :: PMFU(:,:,:) ! Conv. mass flux up - REAL(KIND=JPRB), INTENT(IN) :: PMFD(:,:,:) ! Conv. mass flux down - REAL(KIND=JPRB), INTENT(IN) :: PA(:,:,:) ! Original Cloud fraction (t) - REAL(KIND=JPRB), INTENT(IN) :: PCLV(:,:,:,:) - REAL(KIND=JPRB), INTENT(IN) :: PSUPSAT(:,:,:) - REAL(KIND=JPRB), INTENT(IN) :: PLCRIT_AER(:,:,:) - REAL(KIND=JPRB), INTENT(IN) :: PICRIT_AER(:,:,:) - REAL(KIND=JPRB), INTENT(IN) :: PRE_ICE(:,:,:) - REAL(KIND=JPRB), INTENT(IN) :: PCCN(:,:,:) ! liquid cloud condensation nuclei - REAL(KIND=JPRB), INTENT(IN) :: PNICE(:,:,:) ! ice number concentration (cf. CCN) - - REAL(KIND=JPRB), INTENT(INOUT) :: PCOVPTOT(:,:,:) ! Precip fraction - REAL(KIND=JPRB), INTENT(OUT) :: PRAINFRAC_TOPRFZ(:,:) + TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + INTEGER(KIND=JPIM), INTENT(IN) :: NUMOMP, NGPTOT, NGPTOTG, KFLDX + REAL(KIND=JPRB), INTENT(IN) :: PTSPHY ! Physics timestep + + TYPE(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW) :: FBLOCK + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE + TYPE(ATLAS_FIELD) :: FIELD + INTEGER(KIND=JPIM) :: NPROMA, NLEV + REAL(KIND=JPRB), POINTER :: PT(:,:) ! T at start of callpar + REAL(KIND=JPRB), POINTER :: PQ(:,:) ! Q at start of callpar + TYPE(STATE_TYPE), POINTER :: TENDENCY_CML(:) ! cumulative tendency used for final output + TYPE(STATE_TYPE), POINTER :: TENDENCY_TMP(:) ! cumulative tendency used as input + TYPE(STATE_TYPE), POINTER :: TENDENCY_LOC(:) ! local tendency from cloud scheme + REAL(KIND=JPRB), POINTER:: PVFA(:,:) ! CC from VDF scheme + REAL(KIND=JPRB), POINTER:: PVFL(:,:) ! Liq from VDF scheme + REAL(KIND=JPRB), POINTER:: PVFI(:,:) ! Ice from VDF scheme + REAL(KIND=JPRB), POINTER:: PDYNA(:,:) ! CC from Dynamics + REAL(KIND=JPRB), POINTER:: PDYNL(:,:) ! Liq from Dynamics + REAL(KIND=JPRB), POINTER:: PDYNI(:,:) ! Liq from Dynamics + REAL(KIND=JPRB), POINTER:: PHRSW(:,:) ! Short-wave heating rate + REAL(KIND=JPRB), POINTER:: PHRLW(:,:) ! Long-wave heating rate + REAL(KIND=JPRB), POINTER:: PVERVEL(:,:) !Vertical velocity + REAL(KIND=JPRB), POINTER:: PAP(:,:) ! Pressure on full levels + REAL(KIND=JPRB), POINTER:: PAPH(:,:) ! Pressure on half levels + REAL(KIND=JPRB), POINTER:: PLSM(:) ! Land fraction (0-1) + LOGICAL, POINTER :: LDCUM(:) ! Convection active + INTEGER(KIND=JPIM), POINTER :: KTYPE(:) ! Convection type 0,1,2 + REAL(KIND=JPRB), POINTER:: PLU(:,:) ! Conv. condensate + REAL(KIND=JPRB), POINTER:: PLUDE(:,:) ! Conv. detrained water + REAL(KIND=JPRB), POINTER:: PSNDE(:,:) ! Conv. detrained snow + REAL(KIND=JPRB), POINTER:: PMFU(:,:) ! Conv. mass flux up + REAL(KIND=JPRB), POINTER:: PMFD(:,:) ! Conv. mass flux down + REAL(KIND=JPRB), POINTER:: PA(:,:) ! Original Cloud fraction (t) + REAL(KIND=JPRB), POINTER:: PCLV(:,:,:) + REAL(KIND=JPRB), POINTER:: PSUPSAT(:,:) + REAL(KIND=JPRB), POINTER:: PLCRIT_AER(:,:) + REAL(KIND=JPRB), POINTER:: PICRIT_AER(:,:) + REAL(KIND=JPRB), POINTER:: PRE_ICE(:,:) + REAL(KIND=JPRB), POINTER:: PCCN(:,:) ! liquid cloud condensation nuclei + REAL(KIND=JPRB), POINTER:: PNICE(:,:) ! ice number concentration (cf. CCN) + + REAL(KIND=JPRB), POINTER:: PCOVPTOT(:,:) ! Precip fraction + REAL(KIND=JPRB), POINTER:: PRAINFRAC_TOPRFZ(:) ! Flux diagnostics for DDH budget - REAL(KIND=JPRB), INTENT(OUT) :: PFSQLF(:,:,:) ! Flux of liquid - REAL(KIND=JPRB), INTENT(OUT) :: PFSQIF(:,:,:) ! Flux of ice - REAL(KIND=JPRB), INTENT(OUT) :: PFCQLNG(:,:,:) ! -ve corr for liq - REAL(KIND=JPRB), INTENT(OUT) :: PFCQNNG(:,:,:) ! -ve corr for ice - REAL(KIND=JPRB), INTENT(OUT) :: PFSQRF(:,:,:) ! Flux diagnostics - REAL(KIND=JPRB), INTENT(OUT) :: PFSQSF(:,:,:) ! for DDH, generic - REAL(KIND=JPRB), INTENT(OUT) :: PFCQRNG(:,:,:) ! rain - REAL(KIND=JPRB), INTENT(OUT) :: PFCQSNG(:,:,:) ! snow - REAL(KIND=JPRB), INTENT(OUT) :: PFSQLTUR(:,:,:) ! liquid flux due to VDF - REAL(KIND=JPRB), INTENT(OUT) :: PFSQITUR(:,:,:) ! ice flux due to VDF - REAL(KIND=JPRB), INTENT(OUT) :: PFPLSL(:,:,:) ! liq+rain sedim flux - REAL(KIND=JPRB), INTENT(OUT) :: PFPLSN(:,:,:) ! ice+snow sedim flux - REAL(KIND=JPRB), INTENT(OUT) :: PFHPSL(:,:,:) ! Enthalpy flux for liq - REAL(KIND=JPRB), INTENT(OUT) :: PFHPSN(:,:,:) ! Enthalp flux for ice + REAL(KIND=JPRB), POINTER :: PFSQLF(:,:) ! Flux of liquid + REAL(KIND=JPRB), POINTER :: PFSQIF(:,:) ! Flux of ice + REAL(KIND=JPRB), POINTER :: PFCQLNG(:,:) ! -ve corr for liq + REAL(KIND=JPRB), POINTER :: PFCQNNG(:,:) ! -ve corr for ice + REAL(KIND=JPRB), POINTER :: PFSQRF(:,:) ! Flux diagnostics + REAL(KIND=JPRB), POINTER :: PFSQSF(:,:) ! for DDH, generic + REAL(KIND=JPRB), POINTER :: PFCQRNG(:,:) ! rain + REAL(KIND=JPRB), POINTER :: PFCQSNG(:,:) ! snow + REAL(KIND=JPRB), POINTER :: PFSQLTUR(:,:) ! liquid flux due to VDF + REAL(KIND=JPRB), POINTER :: PFSQITUR(:,:) ! ice flux due to VDF + REAL(KIND=JPRB), POINTER :: PFPLSL(:,:) ! liq+rain sedim flux + REAL(KIND=JPRB), POINTER :: PFPLSN(:,:) ! ice+snow sedim flux + REAL(KIND=JPRB), POINTER :: PFHPSL(:,:) ! Enthalpy flux for liq + REAL(KIND=JPRB), POINTER :: PFHPSN(:,:) ! Enthalp flux for ice INTEGER(KIND=JPIM) :: JKGLO,IBL,ICEND,NGPBLKS @@ -108,6 +103,12 @@ SUBROUTINE CLOUDSC_DRIVER( & POWER_TOTAL = 0_JPIB POWER_COUNT = 0_JPIB + FIELD = FSET%FIELD("PEXTRA") + FSPACE = FIELD%FUNCTIONSPACE() + NPROMA = FSPACE%BLOCK_SIZE(1) + NLEV = FSPACE%LEVELS() + print *, "NPROMA, NUMOMP ", NPROMA, NUMOMP + NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) 1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0) if (irank == 0) then @@ -129,46 +130,49 @@ SUBROUTINE CLOUDSC_DRIVER( & IBL=(JKGLO-1)/NPROMA+1 ICEND=MIN(NPROMA,NGPTOT-JKGLO+1) + ! get block views + call FBLOCK%GET_BLOCK(FSET, IBL) + !-- These were uninitialized : meaningful only when we compare error differences - PCOVPTOT(:,:,IBL) = 0.0_JPRB - TENDENCY_LOC(IBL)%cld(:,:,NCLV) = 0.0_JPRB - - !--- a future plan to replace the call to CLOUDSC ------ - ! - ! type( block_state_t ) - ! real(c_double), pointer :: PT(:,:) - ! type(state_type) :: tendency_LOC - ! type(state_type) :: tendency_TMP - ! type(state_type) :: tendency_CML - ! end type - - ! call extract_block( FSET, IBL, config, block_state ) - ! call FSET%FIELD("PT")%BLOCK_DATA(IBL,PT,CONFIG) - ! call FSET%FIELD("PQ")%BLOCK_DATA(IBL,PQ,CONFIG) - ! call cloudsc_atlas ( FSET, IBL, config ) + FBLOCK%PCOVPTOT(:,:) = 0.0_JPRB + FBLOCK%TENDENCY_LOC%cld(:,:,NCLV) = 0.0_JPRB + + !--- a future plan to replace the call to CLOUDSC ------ + ! + ! type( block_state_t ) + ! real(c_double), pointer :: PT(:,:) + ! type(state_type) :: tendency_LOC + ! type(state_type) :: tendency_TMP + ! type(state_type) :: tendency_CML + ! end type + ! call extract_block( FSET, IBL, config, block_state ) + ! call FSET%FIELD("PT")%BLOCK_DATA(IBL,PT,CONFIG) + ! call FSET%FIELD("PQ")%BLOCK_DATA(IBL,PQ,CONFIG) + ! call cloudsc_atlas ( FSET, IBL, config ) CALL CLOUDSC & & ( 1, ICEND, NPROMA, NLEV,& & PTSPHY,& - & PT(:,:,IBL), PQ(:,:,IBL), TENDENCY_CML(IBL), TENDENCY_TMP(IBL), TENDENCY_LOC(IBL), & - & PVFA(:,:,IBL), PVFL(:,:,IBL), PVFI(:,:,IBL), PDYNA(:,:,IBL), PDYNL(:,:,IBL), PDYNI(:,:,IBL), & - & PHRSW(:,:,IBL), PHRLW(:,:,IBL),& - & PVERVEL(:,:,IBL), PAP(:,:,IBL), PAPH(:,:,IBL),& - & PLSM(:,IBL), LDCUM(:,IBL), KTYPE(:,IBL), & - & PLU(:,:,IBL), PLUDE(:,:,IBL), PSNDE(:,:,IBL), PMFU(:,:,IBL), PMFD(:,:,IBL),& + & FBLOCK%PT, FBLOCK%PQ, FBLOCK%TENDENCY_CML, FBLOCK%TENDENCY_TMP, FBLOCK%TENDENCY_LOC, & + & FBLOCK%PVFA, FBLOCK%PVFL, FBLOCK%PVFI, FBLOCK%PDYNA, FBLOCK%PDYNL, FBLOCK%PDYNI, & + & FBLOCK%PHRSW, FBLOCK%PHRLW,& + & FBLOCK%PVERVEL, FBLOCK%PAP, FBLOCK%PAPH,& + & FBLOCK%PLSM, FBLOCK%LDCUM, FBLOCK%KTYPE, & + & FBLOCK%PLU, FBLOCK%PLUDE, & + & FBLOCK%PSNDE, FBLOCK%PMFU, FBLOCK%PMFD,& !---prognostic fields - & PA(:,:,IBL), PCLV(:,:,:,IBL), PSUPSAT(:,:,IBL),& + & FBLOCK%PA, FBLOCK%PCLV, FBLOCK%PSUPSAT,& !-- arrays for aerosol-cloud interactions - & PLCRIT_AER(:,:,IBL),PICRIT_AER(:,:,IBL),& - & PRE_ICE(:,:,IBL),& - & PCCN(:,:,IBL), PNICE(:,:,IBL),& + & FBLOCK%PLCRIT_AER, FBLOCK%PICRIT_AER,& + & FBLOCK%PRE_ICE,& + & FBLOCK%PCCN, FBLOCK%PNICE,& !---diagnostic output - & PCOVPTOT(:,:,IBL), PRAINFRAC_TOPRFZ(:,IBL),& + & FBLOCK%PCOVPTOT, FBLOCK%PRAINFRAC_TOPRFZ,& !---resulting fluxes - & PFSQLF(:,:,IBL), PFSQIF (:,:,IBL), PFCQNNG(:,:,IBL), PFCQLNG(:,:,IBL),& - & PFSQRF(:,:,IBL), PFSQSF (:,:,IBL), PFCQRNG(:,:,IBL), PFCQSNG(:,:,IBL),& - & PFSQLTUR(:,:,IBL), PFSQITUR (:,:,IBL), & - & PFPLSL(:,:,IBL), PFPLSN(:,:,IBL), PFHPSL(:,:,IBL), PFHPSN(:,:,IBL),& + & FBLOCK%PFSQLF, FBLOCK%PFSQIF, FBLOCK%PFCQNNG, FBLOCK%PFCQLNG,& + & FBLOCK%PFSQRF, FBLOCK%PFSQSF, FBLOCK%PFCQRNG, FBLOCK%PFCQSNG,& + & FBLOCK%PFSQLTUR, FBLOCK%PFSQITUR, & + & FBLOCK%PFPLSL, FBLOCK%PFPLSN, FBLOCK%PFHPSL, FBLOCK%PFHPSN,& & KFLDX) !--- end of a future plan to replace the call to CLOUDSC ------ diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index e210b165..713e833d 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -27,13 +27,12 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD IMPLICIT NONE - TYPE(ATLAS_STRUCTUREDGRID) :: GRID - TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE - TYPE(ATLAS_FIELDSET) :: FSET - TYPE VAR3D_PTR REAL(C_DOUBLE), POINTER :: PTR(:,:,:) END TYPE + TYPE VAR2D_PTR + REAL(C_DOUBLE), POINTER :: PTR(:,:) + END TYPE CHARACTER(LEN=10), PARAMETER, DIMENSION(30) :: IN_VAR_NAMES = (/ & "PLCRIT_AER", "PICRIT_AER", "PRE_ICE ", "PCCN ", "PNICE ", "PT ", "PQ ", & @@ -47,6 +46,68 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD "PFPLSL ", "PFPLSN ", "PFHPSL ", "PFHPSN ", "PCOVPTOT ", & "PRAINFRAC_TOPRFZ" /) + TYPE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW + TYPE(VAR2D_PTR), DIMENSION(24) :: IN_VARS_2D_REAL64 + TYPE(VAR2D_PTR), DIMENSION(15) :: OUT_VARS_2D_REAL64 + + ! Input field variables and tendencies + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLCRIT_AER(:,:) + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PICRIT_AER(:,:) + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PRE_ICE(:,:) + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCCN(:,:) ! liquid cloud condensation nuclei + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PNICE(:,:) ! ice number concentration (cf. CCN) + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PT(:,:) ! T at start of callpar + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PQ(:,:) ! Q at start of callpar + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFA(:,:) ! CC from VDF scheme + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFL(:,:) ! Liq from VDF scheme + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFI(:,:) ! Ice from VDF scheme + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNA(:,:) ! CC from Dynamics + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNL(:,:) ! Liq from Dynamics + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNI(:,:) ! Liq from Dynamics + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PHRSW(:,:) ! Short-wave heating rate + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PHRLW(:,:) ! Long-wave heating rate + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVERVEL(:,:) ! Vertical velocity + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PAP(:,:) ! Pressure on full levels + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLU(:,:) ! Conv. condensate + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLUDE(:,:) ! Conv. detrained water + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PSNDE(:,:) ! Conv. detrained snow + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PMFU(:,:) ! Conv. mass flux up + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PMFD(:,:) ! Conv. mass flux down + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PA(:,:) ! Original Cloud fraction (t) + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PSUPSAT(:,:) + + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLSM(:) ! Land fraction (0-1) + LOGICAL, POINTER, CONTIGUOUS :: LDCUM(:) ! Convection active + INTEGER, POINTER, CONTIGUOUS :: KTYPE(:) ! Convection type 0,1,2 + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PAPH(:,:) ! Pressure on half levels + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PEXTRA(:,:,:) ! extra fields + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCLV(:,:,:) + + TYPE(STATE_TYPE) :: TENDENCY_CML ! cumulative tendency used for final output + TYPE(STATE_TYPE) :: TENDENCY_TMP ! cumulative tendency used as input + TYPE(STATE_TYPE) :: TENDENCY_LOC ! local tendency from cloud scheme + + ! Output fields used for validation + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQLF(:,:) ! Flux of liquid + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQIF(:,:) ! Flux of ice + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQLNG(:,:) ! -ve corr for liq + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQNNG(:,:) ! -ve corr for ice + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQRF(:,:) ! Flux diagnostics + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQSF(:,:) ! for DDH, generic + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQRNG(:,:) ! rain + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQSNG(:,:) ! snow + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQLTUR(:,:) ! liquid flux due to VDF + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQITUR(:,:) ! ice flux due to VDF + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFPLSL(:,:) ! liq+rain sedim flux + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFPLSN(:,:) ! ice+snow sedim flux + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFHPSL(:,:) ! Enthalpy flux for liq + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFHPSN(:,:) ! Enthalpy flux for ice + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCOVPTOT(:,:) ! Precip fraction + REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PRAINFRAC_TOPRFZ(:) + CONTAINS + PROCEDURE :: GET_BLOCK => CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK + END TYPE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW + TYPE CLOUDSC_GLOBAL_ATLAS_STATE ! Memory state containing raw fields annd tendencies for CLOUDSC dwarf ! @@ -59,68 +120,6 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD LOGICAL :: LDMAINCALL ! T if main call to cloudsc REAL(KIND=JPRB) :: PTSPHY ! Physics timestep - TYPE(VAR3D_PTR), DIMENSION(24) :: IN_VARS_3D_REAL64 - TYPE(VAR3D_PTR), DIMENSION(15) :: OUT_VARS_3D_REAL64 - - ! Input field variables and tendencies - REAL(C_DOUBLE), POINTER :: PLCRIT_AER(:,:,:) - REAL(C_DOUBLE), POINTER :: PICRIT_AER(:,:,:) - REAL(C_DOUBLE), POINTER :: PRE_ICE(:,:,:) - REAL(C_DOUBLE), POINTER :: PCCN(:,:,:) ! liquid cloud condensation nuclei - REAL(C_DOUBLE), POINTER :: PNICE(:,:,:) ! ice number concentration (cf. CCN) - REAL(C_DOUBLE), POINTER :: PT(:,:,:) ! T at start of callpar - REAL(C_DOUBLE), POINTER :: PQ(:,:,:) ! Q at start of callpar - REAL(C_DOUBLE), POINTER :: PVFA(:,:,:) ! CC from VDF scheme - REAL(C_DOUBLE), POINTER :: PVFL(:,:,:) ! Liq from VDF scheme - REAL(C_DOUBLE), POINTER :: PVFI(:,:,:) ! Ice from VDF scheme - REAL(C_DOUBLE), POINTER :: PDYNA(:,:,:) ! CC from Dynamics - REAL(C_DOUBLE), POINTER :: PDYNL(:,:,:) ! Liq from Dynamics - REAL(C_DOUBLE), POINTER :: PDYNI(:,:,:) ! Liq from Dynamics - REAL(C_DOUBLE), POINTER :: PHRSW(:,:,:) ! Short-wave heating rate - REAL(C_DOUBLE), POINTER :: PHRLW(:,:,:) ! Long-wave heating rate - REAL(C_DOUBLE), POINTER :: PVERVEL(:,:,:) ! Vertical velocity - REAL(C_DOUBLE), POINTER :: PAP(:,:,:) ! Pressure on full levels - REAL(C_DOUBLE), POINTER :: PLU(:,:,:) ! Conv. condensate - REAL(C_DOUBLE), POINTER :: PLUDE(:,:,:) ! Conv. detrained water - REAL(C_DOUBLE), POINTER :: PSNDE(:,:,:) ! Conv. detrained snow - REAL(C_DOUBLE), POINTER :: PMFU(:,:,:) ! Conv. mass flux up - REAL(C_DOUBLE), POINTER :: PMFD(:,:,:) ! Conv. mass flux down - REAL(C_DOUBLE), POINTER :: PA(:,:,:) ! Original Cloud fraction (t) - REAL(C_DOUBLE), POINTER :: PSUPSAT(:,:,:) - - REAL(C_DOUBLE), POINTER :: PLSM(:,:) ! Land fraction (0-1) - LOGICAL, POINTER :: LDCUM(:,:) ! Convection active - INTEGER(c_int), POINTER :: KTYPE(:,:) ! Convection type 0,1,2 - REAL(C_DOUBLE), POINTER :: PAPH(:,:,:) ! Pressure on half levels - REAL(C_DOUBLE), POINTER :: PEXTRA(:,:,:,:) ! extra fields - REAL(C_DOUBLE), POINTER :: PCLV(:,:,:,:) - - TYPE(STATE_TYPE), ALLOCATABLE :: TENDENCY_CML(:) ! cumulative tendency used for final output - TYPE(STATE_TYPE), ALLOCATABLE :: TENDENCY_TMP(:) ! cumulative tendency used as input - TYPE(STATE_TYPE), ALLOCATABLE :: TENDENCY_LOC(:) ! local tendency from cloud scheme - - ! Output fields used for validation - REAL(C_DOUBLE), POINTER :: PFSQLF(:,:,:) ! Flux of liquid - REAL(C_DOUBLE), POINTER :: PFSQIF(:,:,:) ! Flux of ice - REAL(C_DOUBLE), POINTER :: PFCQLNG(:,:,:) ! -ve corr for liq - REAL(C_DOUBLE), POINTER :: PFCQNNG(:,:,:) ! -ve corr for ice - REAL(C_DOUBLE), POINTER :: PFSQRF(:,:,:) ! Flux diagnostics - REAL(C_DOUBLE), POINTER :: PFSQSF(:,:,:) ! for DDH, generic - REAL(C_DOUBLE), POINTER :: PFCQRNG(:,:,:) ! rain - REAL(C_DOUBLE), POINTER :: PFCQSNG(:,:,:) ! snow - REAL(C_DOUBLE), POINTER :: PFSQLTUR(:,:,:) ! liquid flux due to VDF - REAL(C_DOUBLE), POINTER :: PFSQITUR(:,:,:) ! ice flux due to VDF - REAL(C_DOUBLE), POINTER :: PFPLSL(:,:,:) ! liq+rain sedim flux - REAL(C_DOUBLE), POINTER :: PFPLSN(:,:,:) ! ice+snow sedim flux - REAL(C_DOUBLE), POINTER :: PFHPSL(:,:,:) ! Enthalpy flux for liq - REAL(C_DOUBLE), POINTER :: PFHPSN(:,:,:) ! Enthalpy flux for ice - REAL(C_DOUBLE), POINTER :: PCOVPTOT(:,:,:) ! Precip fraction - REAL(C_DOUBLE), POINTER :: PRAINFRAC_TOPRFZ(:,:) - - ! Underlying data buffers for AOSOA allcoated STATE_TYPE arrays - REAL(C_DOUBLE), POINTER :: B_CML(:,:,:,:) - REAL(C_DOUBLE), POINTER :: B_TMP(:,:,:,:) - REAL(C_DOUBLE), POINTER :: B_LOC(:,:,:,:) CONTAINS PROCEDURE :: LOAD => CLOUDSC_GLOBAL_ATLAS_STATE_LOAD PROCEDURE :: VALIDATE => CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE @@ -128,13 +127,117 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD CONTAINS - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) + ! Load reference input data via serialbox + CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW), INTENT(INOUT) :: SELF + TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + INTEGER, INTENT(IN) :: IBLK + + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE + INTEGER(KIND=JPIM) :: KLON, IVAR, B + TYPE(ATLAS_FIELD) :: FIELD + REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:), TMP2D(:,:) + + ! NOTE the last six input variables need special treatment - different types + DO IVAR = 1, SIZE(IN_VAR_NAMES) - 6 + FIELD = FSET%FIELD(TRIM(IN_VAR_NAMES(IVAR))) + CALL FIELD%DATA(SELF%IN_VARS_2D_REAL64(IVAR)%PTR, IBLK) + ENDDO + SELF%PLCRIT_AER => SELF%IN_VARS_2D_REAL64(1)%PTR + SELF%PICRIT_AER => SELF%IN_VARS_2D_REAL64(2)%PTR + SELF%PRE_ICE => SELF%IN_VARS_2D_REAL64(3)%PTR + SELF%PCCN => SELF%IN_VARS_2D_REAL64(4)%PTR + SELF%PNICE => SELF%IN_VARS_2D_REAL64(5)%PTR + SELF%PT => SELF%IN_VARS_2D_REAL64(6)%PTR + SELF%PQ => SELF%IN_VARS_2D_REAL64(7)%PTR + SELF%PVFA => SELF%IN_VARS_2D_REAL64(8)%PTR + SELF%PVFL => SELF%IN_VARS_2D_REAL64(9)%PTR + SELF%PVFI => SELF%IN_VARS_2D_REAL64(10)%PTR + SELF%PDYNA => SELF%IN_VARS_2D_REAL64(11)%PTR + SELF%PDYNL => SELF%IN_VARS_2D_REAL64(12)%PTR + SELF%PDYNI => SELF%IN_VARS_2D_REAL64(13)%PTR + SELF%PHRSW => SELF%IN_VARS_2D_REAL64(14)%PTR + SELF%PHRLW => SELF%IN_VARS_2D_REAL64(15)%PTR + SELF%PVERVEL => SELF%IN_VARS_2D_REAL64(16)%PTR + SELF%PAP => SELF%IN_VARS_2D_REAL64(17)%PTR + SELF%PLU => SELF%IN_VARS_2D_REAL64(18)%PTR + SELF%PLUDE => SELF%IN_VARS_2D_REAL64(19)%PTR + SELF%PSNDE => SELF%IN_VARS_2D_REAL64(20)%PTR + SELF%PMFU => SELF%IN_VARS_2D_REAL64(21)%PTR + SELF%PMFD => SELF%IN_VARS_2D_REAL64(22)%PTR + SELF%PA => SELF%IN_VARS_2D_REAL64(23)%PTR + SELF%PSUPSAT => SELF%IN_VARS_2D_REAL64(24)%PTR + + FIELD = FSET%FIELD("PLSM") + CALL FIELD%DATA(SELF%PLSM, IBLK) + FIELD = FSET%FIELD("LDCUM") + CALL FIELD%DATA(SELF%LDCUM, IBLK) + FIELD = FSET%FIELD("KTYPE") + CALL FIELD%DATA(SELF%KTYPE, IBLK) + FIELD = FSET%FIELD("PAPH") + CALL FIELD%DATA(SELF%PAPH, IBLK) + FIELD = FSET%FIELD("PEXTRA") + CALL FIELD%DATA(SELF%PEXTRA, IBLK) + FIELD = FSET%FIELD("PCLV") + CALL FIELD%DATA(SELF%PCLV, IBLK) + + FIELD = FSET%FIELD('TENDENCY_CML') + CALL FIELD%DATA(TMP3D, IBLK) + SELF%TENDENCY_CML%T => TMP3D(:,:,1) + SELF%TENDENCY_CML%A => TMP3D(:,:,2) + SELF%TENDENCY_CML%Q => TMP3D(:,:,3) + SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) + FIELD = FSET%FIELD('TENDENCY_TMP') + CALL FIELD%DATA(TMP3D, IBLK) + SELF%TENDENCY_TMP%T => TMP3D(:,:,1) + SELF%TENDENCY_TMP%A => TMP3D(:,:,2) + SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) + SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) + FIELD = FSET%FIELD('TENDENCY_LOC') + CALL FIELD%DATA(TMP3D, IBLK) + SELF%TENDENCY_LOC%T => TMP3D(:,:,1) + SELF%TENDENCY_LOC%A => TMP3D(:,:,2) + SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) + SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) + + DO IVAR = 1, SIZE(SELF%OUT_VARS_2D_REAL64) + FIELD = FSET%FIELD(TRIM(OUT_VAR_NAMES(IVAR))) + CALL FIELD%DATA(SELF%OUT_VARS_2D_REAL64(IVAR)%PTR, IBLK) + ENDDO + SELF%PFSQLF => SELF%OUT_VARS_2D_REAL64(1)%PTR + SELF%PFSQIF => SELF%OUT_VARS_2D_REAL64(2)%PTR + SELF%PFCQLNG => SELF%OUT_VARS_2D_REAL64(3)%PTR + SELF%PFCQNNG => SELF%OUT_VARS_2D_REAL64(4)%PTR + SELF%PFSQRF => SELF%OUT_VARS_2D_REAL64(5)%PTR + SELF%PFSQSF => SELF%OUT_VARS_2D_REAL64(6)%PTR + SELF%PFCQRNG => SELF%OUT_VARS_2D_REAL64(7)%PTR + SELF%PFCQSNG => SELF%OUT_VARS_2D_REAL64(8)%PTR + SELF%PFSQLTUR => SELF%OUT_VARS_2D_REAL64(9)%PTR + SELF%PFSQITUR => SELF%OUT_VARS_2D_REAL64(10)%PTR + SELF%PFPLSL => SELF%OUT_VARS_2D_REAL64(11)%PTR + SELF%PFPLSN => SELF%OUT_VARS_2D_REAL64(12)%PTR + SELF%PFHPSL => SELF%OUT_VARS_2D_REAL64(13)%PTR + SELF%PFHPSN => SELF%OUT_VARS_2D_REAL64(14)%PTR + SELF%PCOVPTOT => SELF%OUT_VARS_2D_REAL64(15)%PTR + + FIELD = FSET%FIELD('PRAINFRAC_TOPRFZ') + CALL FIELD%DATA(SELF%PRAINFRAC_TOPRFZ, IBLK) + END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK + + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) ! Load reference input data via serialbox CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF + TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NGPTOT INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG + TYPE(ATLAS_STRUCTUREDGRID) :: GRID + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE INTEGER(KIND=JPIM) :: KLON, IVAR, B + TYPE(VAR3D_PTR), DIMENSION(24) :: IN_VARS_3D_REAL64 + TYPE(VAR3D_PTR), DIMENSION(15) :: OUT_VARS_3D_REAL64 + REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) + REAL(C_DOUBLE), POINTER :: TMP2D(:,:) TYPE(ATLAS_FIELD) :: FIELD CALL INPUT_INITIALIZE(NAME='input') @@ -146,7 +249,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG) GRID = ATLAS_REGULARLONLATGRID(NGPTOT, 1) FSPACE = ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS(GRID, LEVELS=SELF%KLEV, NPROMA=NPROMA, HALO=0) SELF%NBLOCKS = FSPACE%NBLKS() - FSET = ATLAS_FIELDSET(); + FSET = ATLAS_FIELDSET() DO IVAR = 1, SIZE(IN_VAR_NAMES) - 6 ! last six variables are special CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(IN_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) @@ -158,76 +261,21 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PEXTRA", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,SELF%KFLDX))) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCLV", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,NCLV))) - DO IVAR = 1, SIZE(SELF%IN_VARS_3D_REAL64) - FIELD = FSET%FIELD(TRIM(IN_VAR_NAMES(IVAR))) - CALL FIELD%DATA(SELF%IN_VARS_3D_REAL64(IVAR)%PTR) - ENDDO - SELF%PLCRIT_AER => SELF%IN_VARS_3D_REAL64(1)%PTR - SELF%PICRIT_AER => SELF%IN_VARS_3D_REAL64(2)%PTR - SELF%PRE_ICE => SELF%IN_VARS_3D_REAL64(3)%PTR - SELF%PCCN => SELF%IN_VARS_3D_REAL64(4)%PTR - SELF%PNICE => SELF%IN_VARS_3D_REAL64(5)%PTR - SELF%PT => SELF%IN_VARS_3D_REAL64(6)%PTR - SELF%PQ => SELF%IN_VARS_3D_REAL64(7)%PTR - SELF%PVFA => SELF%IN_VARS_3D_REAL64(8)%PTR - SELF%PVFL => SELF%IN_VARS_3D_REAL64(9)%PTR - SELF%PVFI => SELF%IN_VARS_3D_REAL64(10)%PTR - SELF%PDYNA => SELF%IN_VARS_3D_REAL64(11)%PTR - SELF%PDYNL => SELF%IN_VARS_3D_REAL64(12)%PTR - SELF%PDYNI => SELF%IN_VARS_3D_REAL64(13)%PTR - SELF%PHRSW => SELF%IN_VARS_3D_REAL64(14)%PTR - SELF%PHRLW => SELF%IN_VARS_3D_REAL64(15)%PTR - SELF%PVERVEL => SELF%IN_VARS_3D_REAL64(16)%PTR - SELF%PAP => SELF%IN_VARS_3D_REAL64(17)%PTR - SELF%PLU => SELF%IN_VARS_3D_REAL64(18)%PTR - SELF%PLUDE => SELF%IN_VARS_3D_REAL64(19)%PTR - SELF%PSNDE => SELF%IN_VARS_3D_REAL64(20)%PTR - SELF%PMFU => SELF%IN_VARS_3D_REAL64(21)%PTR - SELF%PMFD => SELF%IN_VARS_3D_REAL64(22)%PTR - SELF%PA => SELF%IN_VARS_3D_REAL64(23)%PTR - SELF%PSUPSAT => SELF%IN_VARS_3D_REAL64(24)%PTR - - FIELD = FSET%FIELD("PLSM") - CALL FIELD%DATA(SELF%PLSM) - FIELD = FSET%FIELD("LDCUM") - CALL FIELD%DATA(SELF%LDCUM) - FIELD = FSET%FIELD("KTYPE") - CALL FIELD%DATA(SELF%KTYPE) - FIELD = FSET%FIELD("PAPH") - CALL FIELD%DATA(SELF%PAPH) - FIELD = FSET%FIELD("PEXTRA") - CALL FIELD%DATA(SELF%PEXTRA) - FIELD = FSET%FIELD("PCLV") - CALL FIELD%DATA(SELF%PCLV) - DO IVAR = 1, SIZE(IN_VAR_NAMES) CALL LOADVAR_ATLAS(FSET, TRIM(IN_VAR_NAMES(IVAR)), KLON, NGPTOTG) ENDDO FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_CML', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) - CALL FIELD%DATA(SELF%B_CML) CALL FSET%ADD(FIELD) FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_TMP', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) - CALL FIELD%DATA(SELF%B_TMP) CALL FSET%ADD(FIELD) FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_LOC', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) - CALL FIELD%DATA(SELF%B_LOC) CALL FSET%ADD(FIELD) ! The STATE_TYPE arrays are tricky, as the AOSOA layout needs to be expictly ! unrolled at every step, and we rely on dirty hackery to do this. - CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_CML', SELF%TENDENCY_CML, KLON, NGPTOTG) - CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_TMP', SELF%TENDENCY_TMP, KLON, NGPTOTG) - ALLOCATE(SELF%TENDENCY_LOC(SELF%NBLOCKS)) - !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) - DO B=1, SELF%NBLOCKS - SELF%TENDENCY_LOC(B)%T => SELF%B_LOC(:,:,1,B) - SELF%TENDENCY_LOC(B)%A => SELF%B_LOC(:,:,2,B) - SELF%TENDENCY_LOC(B)%Q => SELF%B_LOC(:,:,3,B) - SELF%TENDENCY_LOC(B)%CLD => SELF%B_LOC(:,:,4:,B) - END DO - !$OMP END PARALLEL DO - + CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_CML', KLON, NGPTOTG) + CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_TMP', KLON, NGPTOTG) ! Output fields are simply allocated and zero'd DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 2 CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) @@ -237,35 +285,23 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG) DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 1 FIELD = FSET%FIELD(TRIM(OUT_VAR_NAMES(IVAR))) - CALL FIELD%DATA(SELF%OUT_VARS_3D_REAL64(IVAR)%PTR) + CALL FIELD%DATA(TMP3D) !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) DO B=1, SELF%NBLOCKS - SELF%OUT_VARS_3D_REAL64(IVAR)%PTR(:,:,B) = 0.0_JPRB + TMP3D(:,:,B) = 0.0_JPRB END DO !$omp end parallel do ENDDO - - SELF%PFSQLF => SELF%OUT_VARS_3D_REAL64(1)%PTR - SELF%PFSQIF => SELF%OUT_VARS_3D_REAL64(2)%PTR - SELF%PFCQLNG => SELF%OUT_VARS_3D_REAL64(3)%PTR - SELF%PFCQNNG => SELF%OUT_VARS_3D_REAL64(4)%PTR - SELF%PFSQRF => SELF%OUT_VARS_3D_REAL64(5)%PTR - SELF%PFSQSF => SELF%OUT_VARS_3D_REAL64(6)%PTR - SELF%PFCQRNG => SELF%OUT_VARS_3D_REAL64(7)%PTR - SELF%PFCQSNG => SELF%OUT_VARS_3D_REAL64(8)%PTR - SELF%PFSQLTUR => SELF%OUT_VARS_3D_REAL64(9)%PTR - SELF%PFSQITUR => SELF%OUT_VARS_3D_REAL64(10)%PTR - SELF%PFPLSL => SELF%OUT_VARS_3D_REAL64(11)%PTR - SELF%PFPLSN => SELF%OUT_VARS_3D_REAL64(12)%PTR - SELF%PFHPSL => SELF%OUT_VARS_3D_REAL64(13)%PTR - SELF%PFHPSN => SELF%OUT_VARS_3D_REAL64(14)%PTR - SELF%PCOVPTOT => SELF%OUT_VARS_3D_REAL64(15)%PTR + ! DEBUG + !FIELD = FSET%FIELD("PAP") + !call field%data(tmp3d) + !print *, MINVAL(tmp3d), MAXVAL(tmp3d) FIELD = FSET%FIELD("PRAINFRAC_TOPRFZ") - CALL FIELD%DATA(SELF%PRAINFRAC_TOPRFZ) + CALL FIELD%DATA(TMP2D) !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) DO B=1, SELF%NBLOCKS - SELF%PRAINFRAC_TOPRFZ(:,B) = 0.0_JPRB + TMP2D(:,B) = 0.0_JPRB END DO !$OMP END PARALLEL DO @@ -279,12 +315,12 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG) CALL YREPHLI_LOAD_PARAMETERS() CALL INPUT_FINALIZE() - END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, NGPTOT, NGPTOTG) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, NGPTOT, NGPTOTG) ! Validate the correctness of output against reference data CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF + TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET INTEGER(KIND=JPIM), INTENT(IN) :: NGPTOT INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG @@ -292,6 +328,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, NGPTOT, NGPTOTG) CALL INPUT_INITIALIZE(NAME='reference') CALL LOAD_SCALAR('KLON', KLON) + print *, "KLON = ", KLON CALL INPUT_FINALIZE() ! Write variable validation header diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index 2504766c..1e2a7df2 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -28,6 +28,10 @@ PROGRAM DWARF_CLOUDSC INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active) INTEGER(KIND=JPIM) :: NGPTOT ! Local number of grid points +REAL(c_double), pointer :: tmp3d(:,:,:) +type(atlas_fieldset) :: fset +type(atlas_field) :: field + TYPE(CLOUDSC_GLOBAL_ATLAS_STATE) :: GLOBAL_ATLAS_STATE INTEGER(KIND=JPIB) :: ENERGY, POWER @@ -69,33 +73,20 @@ PROGRAM DWARF_CLOUDSC READ(CLARG(1:LENARG),*) NPROMA ENDIF -! TODO: Create a global global memory state from serialized input data -CALL GLOBAL_ATLAS_STATE%LOAD(NPROMA, NGPTOT, NGPTOTG) +FSET = ATLAS_FIELDSET() + +! TODO: Create a global memory state from serialized input data +CALL GLOBAL_ATLAS_STATE%LOAD(FSET, NPROMA, NGPTOT, NGPTOTG) + +!FIELD = FSET%FIELD("PAP") +!call field%data(tmp3d) +!print *, MINVAL(tmp3d), MAXVAL(tmp3d) ! Call the driver to perform the parallel loop over our kernel -CALL CLOUDSC_DRIVER(NUMOMP, NPROMA, GLOBAL_ATLAS_STATE%KLEV, NGPTOT, NGPTOTG, & - & GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY, & - & GLOBAL_ATLAS_STATE%PT, GLOBAL_ATLAS_STATE%PQ, & - & GLOBAL_ATLAS_STATE%TENDENCY_CML, GLOBAL_ATLAS_STATE%TENDENCY_TMP, GLOBAL_ATLAS_STATE%TENDENCY_LOC, & - & GLOBAL_ATLAS_STATE%PVFA, GLOBAL_ATLAS_STATE%PVFL, GLOBAL_ATLAS_STATE%PVFI, & - & GLOBAL_ATLAS_STATE%PDYNA, GLOBAL_ATLAS_STATE%PDYNL, GLOBAL_ATLAS_STATE%PDYNI, & - & GLOBAL_ATLAS_STATE%PHRSW, GLOBAL_ATLAS_STATE%PHRLW, & - & GLOBAL_ATLAS_STATE%PVERVEL, GLOBAL_ATLAS_STATE%PAP, GLOBAL_ATLAS_STATE%PAPH, & - & GLOBAL_ATLAS_STATE%PLSM, GLOBAL_ATLAS_STATE%LDCUM, GLOBAL_ATLAS_STATE%KTYPE, & - & GLOBAL_ATLAS_STATE%PLU, GLOBAL_ATLAS_STATE%PLUDE, GLOBAL_ATLAS_STATE%PSNDE, & - & GLOBAL_ATLAS_STATE%PMFU, GLOBAL_ATLAS_STATE%PMFD, & - & GLOBAL_ATLAS_STATE%PA, GLOBAL_ATLAS_STATE%PCLV, GLOBAL_ATLAS_STATE%PSUPSAT,& - & GLOBAL_ATLAS_STATE%PLCRIT_AER, GLOBAL_ATLAS_STATE%PICRIT_AER, GLOBAL_ATLAS_STATE%PRE_ICE, & - & GLOBAL_ATLAS_STATE%PCCN, GLOBAL_ATLAS_STATE%PNICE,& - & GLOBAL_ATLAS_STATE%PCOVPTOT, GLOBAL_ATLAS_STATE%PRAINFRAC_TOPRFZ, & - & GLOBAL_ATLAS_STATE%PFSQLF, GLOBAL_ATLAS_STATE%PFSQIF , GLOBAL_ATLAS_STATE%PFCQNNG, GLOBAL_ATLAS_STATE%PFCQLNG, & - & GLOBAL_ATLAS_STATE%PFSQRF, GLOBAL_ATLAS_STATE%PFSQSF , GLOBAL_ATLAS_STATE%PFCQRNG, GLOBAL_ATLAS_STATE%PFCQSNG, & - & GLOBAL_ATLAS_STATE%PFSQLTUR, GLOBAL_ATLAS_STATE%PFSQITUR, & - & GLOBAL_ATLAS_STATE%PFPLSL, GLOBAL_ATLAS_STATE%PFPLSN, GLOBAL_ATLAS_STATE%PFHPSL, GLOBAL_ATLAS_STATE%PFHPSN & - & ) +CALL CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY) ! Validate the output against serialized reference data -!CALL GLOBAL_ATLAS_STATE%VALIDATE(NGPTOT, NGPTOTG) +CALL GLOBAL_ATLAS_STATE%VALIDATE(FSET, NGPTOT, NGPTOTG) CALL ATLAS_LIBRARY%FINALISE() diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index 1037f9c1..edcf8c1b 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -98,15 +98,13 @@ subroutine loadvar_atlas(fset, name, nlon, ngptotg) endif end subroutine loadvar_atlas - subroutine loadstate_atlas(fset, name, state, nlon, ngptotg) + subroutine loadstate_atlas(fset, name, nlon, ngptotg) ! Load into the local memory buffer and expand to global field type(atlas_fieldset), intent(inout) :: fset character(len=*) :: name - type(state_type), allocatable, intent(inout) :: state(:) integer(kind=jpim), intent(in) :: nlon integer(kind=jpim), intent(in), optional :: ngptotg - integer :: b integer(kind=jpim) :: start, end, size, nlev, nproma, ngptot, nblocks, ndim type(atlas_field) :: field type(atlas_functionspace_blockstructuredcolumns) :: fspace @@ -124,7 +122,6 @@ subroutine loadstate_atlas(fset, name, state, nlon, ngptotg) call get_offsets(start, end, size, nlon, ndim, nlev, ngptot, ngptotg) allocate(buffer(size, nlev, 3+ndim)) - if (.not. allocated(state)) allocate(state(nblocks)) call field%data(field_r3) call load_array(name//'_T', start, end, size, nlon, nlev, buffer(:,:,1)) @@ -137,15 +134,6 @@ subroutine loadstate_atlas(fset, name, state, nlon, ngptotg) call expand(buffer(:,:,3), field_r3(:,:,3,:), size, nproma, nlev, ngptot, nblocks) call expand(buffer(:,:,4:), field_r3(:,:,4:,:), size, nproma, nlev, ndim, ngptot, nblocks) deallocate(buffer) - -!$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) schedule(runtime) - do b=1, nblocks - state(b)%t => field_r3(:,:,1,b) - state(b)%a => field_r3(:,:,2,b) - state(b)%q => field_r3(:,:,3,b) - state(b)%cld => field_r3(:,:,4:3+ndim,b) - end do -!$OMP end parallel do end subroutine loadstate_atlas end module expand_atlas_mod diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 32365859..1b1b43f1 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -141,7 +141,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) DO B=1, NBLOCKS - BSIZE = MIN(NLON, NGPTOT - (B-1)*NLON) ! Field block size + BSIZE = FSPACE%BLOCK_SIZE(B) ZMINVAL(1) = MIN(ZMINVAL(1),MINVAL(FIELD_R3(:,:,4:,B))) ZMAX_VAL_ERR(1) = MAX(ZMAX_VAL_ERR(1),MAXVAL(FIELD_R3(:,:,4:,B))) DO JM=1, NDIM From 9d2c328b2e19d78364165fe6f1d4c1c5a02c388a Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Tue, 9 May 2023 10:01:02 +0000 Subject: [PATCH 029/174] Python-f2py: Small fixes to CI runner and CMake setup --- .github/workflows/build.yml | 6 +++--- src/cloudsc_python/CMakeLists.txt | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fd03258e..5bca7bd0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ on: jobs: # This workflow contains a single job called "build" build: - name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }} ${{ matrix.pyiface_flag }} ${{ matrix.python_flag }} + name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }} ${{ matrix.pyiface_flag }} ${{ matrix.python_f2py_flag }} # The type of runner that the job will run on runs-on: ubuntu-20.04 @@ -124,7 +124,7 @@ jobs: --arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \ ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} \ ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} \ - ${{ matrix.pyiface_flag }} ${{ matrix.python_flag }} + ${{ matrix.pyiface_flag }} ${{ matrix.python_f2py_flag }} # Verify targets exist - name: Verify targets @@ -136,7 +136,7 @@ jobs: loki_flag: ${{ matrix.loki_flag }} claw_flag: ${{ matrix.claw_flag }} pyiface_flag: ${{ matrix.pyiface_flag }} - python_flag: ${{ matrix.python_flag }} + python_flag: ${{ matrix.python_f2py_flag }} run: .github/scripts/verify-targets.sh # Run double-precision targets diff --git a/src/cloudsc_python/CMakeLists.txt b/src/cloudsc_python/CMakeLists.txt index 7e0ffdc1..8845827b 100644 --- a/src/cloudsc_python/CMakeLists.txt +++ b/src/cloudsc_python/CMakeLists.txt @@ -21,16 +21,16 @@ if( HAVE_CLOUDSC_PYTHON_F2PY ) set( cloudsc_VENV_PATH ${CMAKE_BINARY_DIR}/venv_cloudsc ) setup_python_venv( ${cloudsc_VENV_PATH} ) - # Update to latest pip versionxs - execute_process(COMMAND ${Python3_EXECUTABLE} -m pip install --upgrade pip) - if( NOT Python3_EXECUTABLE ) ecbuild_error("[PyIface] Could not find Python3 executable in virtualenv") endif() + # Update to latest pip versionxs + execute_process(COMMAND ${Python3_EXECUTABLE} -m pip install --upgrade pip) + # Install the "cloudscf2py" Python package and runner in editable mode add_custom_command( OUTPUT ${Python3_VENV_BIN}/cloudsc_f2py.py - COMMAND COMMAND ${Python3_EXECUTABLE} -m pip install -e ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${Python3_EXECUTABLE} -m pip install -e ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "[CLOUDSC-Python] Installing cloudscf2py into virtualenv [${cloudsc_VENV_PATH}]" ) From a2545c7712e2667b777f290066865e687f0e1bf9 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Tue, 9 May 2023 10:08:27 +0000 Subject: [PATCH 030/174] Python-f2py: Remove defunct --nthreads runner options --- src/cloudsc_python/drivers/cloudsc_f2py.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/cloudsc_python/drivers/cloudsc_f2py.py b/src/cloudsc_python/drivers/cloudsc_f2py.py index f9e5a084..a55c396b 100644 --- a/src/cloudsc_python/drivers/cloudsc_f2py.py +++ b/src/cloudsc_python/drivers/cloudsc_f2py.py @@ -76,7 +76,7 @@ def cloudsc_validate(fields, ref_fields, kidia, kfdia): ) -def run_cloudsc_kernel(nthreads, ngptot, nproma, input_path, reference_path): +def run_cloudsc_kernel(ngptot, nproma, input_path, reference_path): from cloudscf2py import ( load_input_fields, load_input_parameters, load_reference_fields, cloudsc_py @@ -102,10 +102,6 @@ def run_cloudsc_kernel(nthreads, ngptot, nproma, input_path, reference_path): @click.command() -@click.option( - '--nthreads', default=1, - help='Number of OpenMP threads to use' -) @click.option( '--ngptot', default=100, help='Total number of columns to use for benchamrking' @@ -118,7 +114,7 @@ def run_cloudsc_kernel(nthreads, ngptot, nproma, input_path, reference_path): '--generate/--no-generate', default=False, help='(Re)generate kernel via Loki-Fortran-Python transform' ) -def main(nthreads, ngptot, nproma, generate): +def main(ngptot, nproma, generate): """ Run a Python version of CLOUDSC and validate against reference data """ @@ -136,7 +132,7 @@ def main(nthreads, ngptot, nproma, generate): ) run_cloudsc_kernel( - nthreads, ngptot, nproma, input_path=input_path, reference_path=reference_path + ngptot, nproma, input_path=input_path, reference_path=reference_path ) From d24f63fb8811ebc58600fd7c7d5b4eefb542a769 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 9 May 2023 15:15:24 +0200 Subject: [PATCH 031/174] FieldSet is not thread-safe -> unpack fields prior the cloudsc loop (tnx Willem) --- .../cloudsc_driver_mod.F90 | 66 +---- .../cloudsc_global_atlas_state_mod.F90 | 260 ++++++++++++------ 2 files changed, 189 insertions(+), 137 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index 8a84c043..d173c870 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -14,7 +14,7 @@ MODULE CLOUDSC_DRIVER_MOD USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM USE EC_PMON_MOD, ONLY: EC_PMON - USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW + USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW, CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS USE ATLAS_MODULE USE, INTRINSIC :: ISO_C_BINDING @@ -33,60 +33,11 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) INTEGER(KIND=JPIM), INTENT(IN) :: NUMOMP, NGPTOT, NGPTOTG, KFLDX REAL(KIND=JPRB), INTENT(IN) :: PTSPHY ! Physics timestep + TYPE(CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS) :: SFIELDS TYPE(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW) :: FBLOCK TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(ATLAS_FIELD) :: FIELD INTEGER(KIND=JPIM) :: NPROMA, NLEV - REAL(KIND=JPRB), POINTER :: PT(:,:) ! T at start of callpar - REAL(KIND=JPRB), POINTER :: PQ(:,:) ! Q at start of callpar - TYPE(STATE_TYPE), POINTER :: TENDENCY_CML(:) ! cumulative tendency used for final output - TYPE(STATE_TYPE), POINTER :: TENDENCY_TMP(:) ! cumulative tendency used as input - TYPE(STATE_TYPE), POINTER :: TENDENCY_LOC(:) ! local tendency from cloud scheme - REAL(KIND=JPRB), POINTER:: PVFA(:,:) ! CC from VDF scheme - REAL(KIND=JPRB), POINTER:: PVFL(:,:) ! Liq from VDF scheme - REAL(KIND=JPRB), POINTER:: PVFI(:,:) ! Ice from VDF scheme - REAL(KIND=JPRB), POINTER:: PDYNA(:,:) ! CC from Dynamics - REAL(KIND=JPRB), POINTER:: PDYNL(:,:) ! Liq from Dynamics - REAL(KIND=JPRB), POINTER:: PDYNI(:,:) ! Liq from Dynamics - REAL(KIND=JPRB), POINTER:: PHRSW(:,:) ! Short-wave heating rate - REAL(KIND=JPRB), POINTER:: PHRLW(:,:) ! Long-wave heating rate - REAL(KIND=JPRB), POINTER:: PVERVEL(:,:) !Vertical velocity - REAL(KIND=JPRB), POINTER:: PAP(:,:) ! Pressure on full levels - REAL(KIND=JPRB), POINTER:: PAPH(:,:) ! Pressure on half levels - REAL(KIND=JPRB), POINTER:: PLSM(:) ! Land fraction (0-1) - LOGICAL, POINTER :: LDCUM(:) ! Convection active - INTEGER(KIND=JPIM), POINTER :: KTYPE(:) ! Convection type 0,1,2 - REAL(KIND=JPRB), POINTER:: PLU(:,:) ! Conv. condensate - REAL(KIND=JPRB), POINTER:: PLUDE(:,:) ! Conv. detrained water - REAL(KIND=JPRB), POINTER:: PSNDE(:,:) ! Conv. detrained snow - REAL(KIND=JPRB), POINTER:: PMFU(:,:) ! Conv. mass flux up - REAL(KIND=JPRB), POINTER:: PMFD(:,:) ! Conv. mass flux down - REAL(KIND=JPRB), POINTER:: PA(:,:) ! Original Cloud fraction (t) - REAL(KIND=JPRB), POINTER:: PCLV(:,:,:) - REAL(KIND=JPRB), POINTER:: PSUPSAT(:,:) - REAL(KIND=JPRB), POINTER:: PLCRIT_AER(:,:) - REAL(KIND=JPRB), POINTER:: PICRIT_AER(:,:) - REAL(KIND=JPRB), POINTER:: PRE_ICE(:,:) - REAL(KIND=JPRB), POINTER:: PCCN(:,:) ! liquid cloud condensation nuclei - REAL(KIND=JPRB), POINTER:: PNICE(:,:) ! ice number concentration (cf. CCN) - - REAL(KIND=JPRB), POINTER:: PCOVPTOT(:,:) ! Precip fraction - REAL(KIND=JPRB), POINTER:: PRAINFRAC_TOPRFZ(:) - ! Flux diagnostics for DDH budget - REAL(KIND=JPRB), POINTER :: PFSQLF(:,:) ! Flux of liquid - REAL(KIND=JPRB), POINTER :: PFSQIF(:,:) ! Flux of ice - REAL(KIND=JPRB), POINTER :: PFCQLNG(:,:) ! -ve corr for liq - REAL(KIND=JPRB), POINTER :: PFCQNNG(:,:) ! -ve corr for ice - REAL(KIND=JPRB), POINTER :: PFSQRF(:,:) ! Flux diagnostics - REAL(KIND=JPRB), POINTER :: PFSQSF(:,:) ! for DDH, generic - REAL(KIND=JPRB), POINTER :: PFCQRNG(:,:) ! rain - REAL(KIND=JPRB), POINTER :: PFCQSNG(:,:) ! snow - REAL(KIND=JPRB), POINTER :: PFSQLTUR(:,:) ! liquid flux due to VDF - REAL(KIND=JPRB), POINTER :: PFSQITUR(:,:) ! ice flux due to VDF - REAL(KIND=JPRB), POINTER :: PFPLSL(:,:) ! liq+rain sedim flux - REAL(KIND=JPRB), POINTER :: PFPLSN(:,:) ! ice+snow sedim flux - REAL(KIND=JPRB), POINTER :: PFHPSL(:,:) ! Enthalpy flux for liq - REAL(KIND=JPRB), POINTER :: PFHPSN(:,:) ! Enthalp flux for ice INTEGER(KIND=JPIM) :: JKGLO,IBL,ICEND,NGPBLKS @@ -107,18 +58,20 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) FSPACE = FIELD%FUNCTIONSPACE() NPROMA = FSPACE%BLOCK_SIZE(1) NLEV = FSPACE%LEVELS() - print *, "NPROMA, NUMOMP ", NPROMA, NUMOMP NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) -1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0) +1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGTOT=', i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0) if (irank == 0) then - write(0,1003) NUMPROC,NUMOMP,NGPTOTG,NPROMA,NGPBLKS + write(0,1003) NUMPROC,NUMOMP,NGPTOT, NGPTOTG,NPROMA,NGPBLKS end if ! Global timer for the parallel region CALL TIMER%START(NUMOMP) - !$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power) & + + CALL SFIELDS%SETUP(FSET) + + !$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power,FBLOCK) & !$omp& num_threads(NUMOMP) ! Local timer for each thread @@ -131,7 +84,8 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) ICEND=MIN(NPROMA,NGPTOT-JKGLO+1) ! get block views - call FBLOCK%GET_BLOCK(FSET, IBL) + call FBLOCK%GET_BLOCK(SFIELDS, IBL) + CONTINUE !-- These were uninitialized : meaningful only when we compare error differences FBLOCK%PCOVPTOT(:,:) = 0.0_JPRB diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 713e833d..ba18ac74 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -34,6 +34,9 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD REAL(C_DOUBLE), POINTER :: PTR(:,:) END TYPE + !INTEGER, PARAMETER PLCRIT_AER = 1 + !INTEGER, PARAMETER PLCRIT_AER = 2 + !IN_VAR_NAMES(PLCRIT_AER) CHARACTER(LEN=10), PARAMETER, DIMENSION(30) :: IN_VAR_NAMES = (/ & "PLCRIT_AER", "PICRIT_AER", "PRE_ICE ", "PCCN ", "PNICE ", "PT ", "PQ ", & "PVFA ", "PVFL ", "PVFI ", "PDYNA ", "PDYNL ", "PDYNI ", "PHRSW ", & @@ -83,9 +86,9 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PEXTRA(:,:,:) ! extra fields REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCLV(:,:,:) - TYPE(STATE_TYPE) :: TENDENCY_CML ! cumulative tendency used for final output - TYPE(STATE_TYPE) :: TENDENCY_TMP ! cumulative tendency used as input - TYPE(STATE_TYPE) :: TENDENCY_LOC ! local tendency from cloud scheme + TYPE(STATE_TYPE) :: TENDENCY_CML ! cumulative tendency used for final output + TYPE(STATE_TYPE) :: TENDENCY_TMP ! cumulative tendency used as input + TYPE(STATE_TYPE) :: TENDENCY_LOC ! local tendency from cloud scheme ! Output fields used for validation REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQLF(:,:) ! Flux of liquid @@ -104,10 +107,72 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFHPSN(:,:) ! Enthalpy flux for ice REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCOVPTOT(:,:) ! Precip fraction REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PRAINFRAC_TOPRFZ(:) - CONTAINS + + CONTAINS PROCEDURE :: GET_BLOCK => CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK END TYPE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW + TYPE CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS + !TYPE(atlas_field), allocatlabe :: fields(:) + + ! Input field variables and tendencies + TYPE(atlas_field) :: fPLCRIT_AER + TYPE(atlas_field) :: fPICRIT_AER + TYPE(atlas_field) :: fPRE_ICE + TYPE(atlas_field) :: fPCCN ! liquid cloud condensation nuclei + TYPE(atlas_field) :: fPNICE ! ice number concentration (cf. CCN) + TYPE(atlas_field) :: fPT ! T at start of callpar + TYPE(atlas_field) :: fPQ ! Q at start of callpar + TYPE(atlas_field) :: fPVFA ! CC from VDF scheme + TYPE(atlas_field) :: fPVFL ! Liq from VDF scheme + TYPE(atlas_field) :: fPVFI ! Ice from VDF scheme + TYPE(atlas_field) :: fPDYNA ! CC from Dynamics + TYPE(atlas_field) :: fPDYNL ! Liq from Dynamics + TYPE(atlas_field) :: fPDYNI ! Liq from Dynamics + TYPE(atlas_field) :: fPHRSW ! Short-wave heating rate + TYPE(atlas_field) :: fPHRLW ! Long-wave heating rate + TYPE(atlas_field) :: fPVERVEL ! Vertical velocity + TYPE(atlas_field) :: fPAP ! Pressure on full levels + TYPE(atlas_field) :: fPLU ! Conv. condensate + TYPE(atlas_field) :: fPLUDE ! Conv. detrained water + TYPE(atlas_field) :: fPSNDE ! Conv. detrained snow + TYPE(atlas_field) :: fPMFU ! Conv. mass flux up + TYPE(atlas_field) :: fPMFD ! Conv. mass flux down + TYPE(atlas_field) :: fPA ! Original Cloud fraction (t) + TYPE(atlas_field) :: fPSUPSAT + + TYPE(atlas_field) :: fPLSM ! Land fraction (0-1) + TYPE(atlas_field) :: fLDCUM ! Convection active + TYPE(atlas_field) :: fKTYPE ! Convection type 0,1,2 + TYPE(atlas_field) :: fPAPH ! Pressure on half levels + TYPE(atlas_field) :: fPEXTRA ! extra fields + TYPE(atlas_field) :: fPCLV + + TYPE(atlas_field) :: fTENDENCY_CML ! cumulative tendency used for final output + TYPE(atlas_field) :: fTENDENCY_TMP ! cumulative tendency used as input + TYPE(atlas_field) :: fTENDENCY_LOC ! local tendency from cloud scheme + + ! Output fields used for validation + TYPE(atlas_field) :: fPFSQLF ! Flux of liquid + TYPE(atlas_field) :: fPFSQIF ! Flux of ice + TYPE(atlas_field) :: fPFCQLNG ! -ve corr for liq + TYPE(atlas_field) :: fPFCQNNG ! -ve corr for ice + TYPE(atlas_field) :: fPFSQRF ! Flux diagnostics + TYPE(atlas_field) :: fPFSQSF ! for DDH, generic + TYPE(atlas_field) :: fPFCQRNG ! rain + TYPE(atlas_field) :: fPFCQSNG ! snow + TYPE(atlas_field) :: fPFSQLTUR ! liquid flux due to VDF + TYPE(atlas_field) :: fPFSQITUR ! ice flux due to VDF + TYPE(atlas_field) :: fPFPLSL ! liq+rain sedim flux + TYPE(atlas_field) :: fPFPLSN ! ice+snow sedim flux + TYPE(atlas_field) :: fPFHPSL ! Enthalpy flux for liq + TYPE(atlas_field) :: fPFHPSN ! Enthalpy flux for ice + TYPE(atlas_field) :: fPCOVPTOT ! Precip fraction + TYPE(atlas_field) :: fPRAINFRAC_TOPRFZ + CONTAINS + PROCEDURE :: SETUP => CLOUDSC_GLOBAL_ATLAS_SETUP_BLOCK + END TYPE CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS + TYPE CLOUDSC_GLOBAL_ATLAS_STATE ! Memory state containing raw fields annd tendencies for CLOUDSC dwarf ! @@ -127,101 +192,134 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD CONTAINS - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) - ! Load reference input data via serialbox - CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW), INTENT(INOUT) :: SELF + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_SETUP_BLOCK(SELF, FSET) + CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS), INTENT(INOUT) :: SELF TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + + SELF%fPLCRIT_AER = FSET%FIELD("PLCRIT_AER") + SELF%fPICRIT_AER = FSET%FIELD("PICRIT_AER") + SELF%fPRE_ICE = FSET%FIELD("PRE_ICE") + SELF%fPCCN = FSET%FIELD("PCCN") + SELF%fPNICE = FSET%FIELD("PNICE") + SELF%fPT = FSET%FIELD("PT") + SELF%fPQ = FSET%FIELD("PQ") + SELF%fPVFA = FSET%FIELD("PVFA") + SELF%fPVFL = FSET%FIELD("PVFL") + SELF%fPVFI = FSET%FIELD("PVFI") + SELF%fPDYNA = FSET%FIELD("PDYNA") + SELF%fPDYNL = FSET%FIELD("PDYNL") + SELF%fPDYNI = FSET%FIELD("PDYNI") + SELF%fPHRSW = FSET%FIELD("PHRSW") + SELF%fPHRLW = FSET%FIELD("PHRLW") + SELF%fPVERVEL = FSET%FIELD("PVERVEL") + SELF%fPAP = FSET%FIELD("PAP") + SELF%fPLU = FSET%FIELD("PLU") + SELF%fPLUDE = FSET%FIELD("PLUDE") + SELF%fPSNDE = FSET%FIELD("PSNDE") + SELF%fPMFU = FSET%FIELD("PMFU") + SELF%fPMFD = FSET%FIELD("PMFD") + SELF%fPA = FSET%FIELD("PA") + SELF%fPSUPSAT = FSET%FIELD("PSUPSAT") + SELF%fPLSM = FSET%FIELD("PLSM") + SELF%fLDCUM = FSET%FIELD("LDCUM") + SELF%fKTYPE = FSET%FIELD("KTYPE") + SELF%fPAPH = FSET%FIELD("PAPH") + SELF%fPEXTRA = FSET%FIELD("PEXTRA") + SELF%fPCLV = FSET%FIELD("PCLV") + + SELF%fTENDENCY_CML = FSET%FIELD('TENDENCY_CML') + SELF%fTENDENCY_TMP = FSET%FIELD('TENDENCY_TMP') + SELF%fTENDENCY_LOC = FSET%FIELD('TENDENCY_LOC') + + SELF%fPFSQLF = FSET%FIELD("PFSQLF") + SELF%fPFSQIF = FSET%FIELD("PFSQIF") + SELF%fPFCQLNG = FSET%FIELD("PFCQLNG") + SELF%fPFCQNNG = FSET%FIELD("PFCQNNG") + SELF%fPFSQRF = FSET%FIELD("PFSQRF") + SELF%fPFSQSF = FSET%FIELD("PFSQSF") + SELF%fPFCQRNG = FSET%FIELD("PFCQRNG") + SELF%fPFCQSNG = FSET%FIELD("PFCQSNG") + SELF%fPFSQLTUR = FSET%FIELD("PFSQLTUR") + SELF%fPFSQITUR = FSET%FIELD("PFSQITUR") + SELF%fPFPLSL = FSET%FIELD("PFPLSL") + SELF%fPFPLSN = FSET%FIELD("PFPLSN") + SELF%fPFHPSL = FSET%FIELD("PFHPSL") + SELF%fPFHPSN = FSET%FIELD("PFHPSN") + SELF%fPCOVPTOT = FSET%FIELD("PCOVPTOT") + SELF%fPRAINFRAC_TOPRFZ = FSET%FIELD("PRAINFRAC_TOPRFZ") + END SUBROUTINE + + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FIELDS, IBLK) + CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW), INTENT(INOUT) :: SELF + CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS), INTENT(INOUT) :: FIELDS INTEGER, INTENT(IN) :: IBLK - TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE - INTEGER(KIND=JPIM) :: KLON, IVAR, B - TYPE(ATLAS_FIELD) :: FIELD - REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:), TMP2D(:,:) + REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) ! NOTE the last six input variables need special treatment - different types - DO IVAR = 1, SIZE(IN_VAR_NAMES) - 6 - FIELD = FSET%FIELD(TRIM(IN_VAR_NAMES(IVAR))) - CALL FIELD%DATA(SELF%IN_VARS_2D_REAL64(IVAR)%PTR, IBLK) - ENDDO - SELF%PLCRIT_AER => SELF%IN_VARS_2D_REAL64(1)%PTR - SELF%PICRIT_AER => SELF%IN_VARS_2D_REAL64(2)%PTR - SELF%PRE_ICE => SELF%IN_VARS_2D_REAL64(3)%PTR - SELF%PCCN => SELF%IN_VARS_2D_REAL64(4)%PTR - SELF%PNICE => SELF%IN_VARS_2D_REAL64(5)%PTR - SELF%PT => SELF%IN_VARS_2D_REAL64(6)%PTR - SELF%PQ => SELF%IN_VARS_2D_REAL64(7)%PTR - SELF%PVFA => SELF%IN_VARS_2D_REAL64(8)%PTR - SELF%PVFL => SELF%IN_VARS_2D_REAL64(9)%PTR - SELF%PVFI => SELF%IN_VARS_2D_REAL64(10)%PTR - SELF%PDYNA => SELF%IN_VARS_2D_REAL64(11)%PTR - SELF%PDYNL => SELF%IN_VARS_2D_REAL64(12)%PTR - SELF%PDYNI => SELF%IN_VARS_2D_REAL64(13)%PTR - SELF%PHRSW => SELF%IN_VARS_2D_REAL64(14)%PTR - SELF%PHRLW => SELF%IN_VARS_2D_REAL64(15)%PTR - SELF%PVERVEL => SELF%IN_VARS_2D_REAL64(16)%PTR - SELF%PAP => SELF%IN_VARS_2D_REAL64(17)%PTR - SELF%PLU => SELF%IN_VARS_2D_REAL64(18)%PTR - SELF%PLUDE => SELF%IN_VARS_2D_REAL64(19)%PTR - SELF%PSNDE => SELF%IN_VARS_2D_REAL64(20)%PTR - SELF%PMFU => SELF%IN_VARS_2D_REAL64(21)%PTR - SELF%PMFD => SELF%IN_VARS_2D_REAL64(22)%PTR - SELF%PA => SELF%IN_VARS_2D_REAL64(23)%PTR - SELF%PSUPSAT => SELF%IN_VARS_2D_REAL64(24)%PTR - - FIELD = FSET%FIELD("PLSM") - CALL FIELD%DATA(SELF%PLSM, IBLK) - FIELD = FSET%FIELD("LDCUM") - CALL FIELD%DATA(SELF%LDCUM, IBLK) - FIELD = FSET%FIELD("KTYPE") - CALL FIELD%DATA(SELF%KTYPE, IBLK) - FIELD = FSET%FIELD("PAPH") - CALL FIELD%DATA(SELF%PAPH, IBLK) - FIELD = FSET%FIELD("PEXTRA") - CALL FIELD%DATA(SELF%PEXTRA, IBLK) - FIELD = FSET%FIELD("PCLV") - CALL FIELD%DATA(SELF%PCLV, IBLK) - - FIELD = FSET%FIELD('TENDENCY_CML') - CALL FIELD%DATA(TMP3D, IBLK) + CALL FIELDS%fPLCRIT_AER%DATA(SELF%PLCRIT_AER, IBLK) + CALL FIELDS%fPICRIT_AER%DATA(SELF%PICRIT_AER, IBLK) + CALL FIELDS%fPRE_ICE%DATA(SELF%PRE_ICE, IBLK) + CALL FIELDS%fPCCN%DATA(SELF%PCCN, IBLK) + CALL FIELDS%fPNICE%DATA(SELF%PNICE, IBLK) + CALL FIELDS%fPT%DATA(SELF%PT, IBLK) + CALL FIELDS%fPQ%DATA(SELF%PQ, IBLK) + CALL FIELDS%fPVFA%DATA(SELF%PVFA, IBLK) + CALL FIELDS%fPVFL%DATA(SELF%PVFL, IBLK) + CALL FIELDS%fPVFI%DATA(SELF%PVFI, IBLK) + CALL FIELDS%fPDYNA%DATA(SELF%PDYNA, IBLK) + CALL FIELDS%fPDYNL%DATA(SELF%PDYNL, IBLK) + CALL FIELDS%fPDYNI%DATA(SELF%PDYNI, IBLK) + CALL FIELDS%fPHRSW%DATA(SELF%PHRSW, IBLK) + CALL FIELDS%fPHRLW%DATA(SELF%PHRLW, IBLK) + CALL FIELDS%fPVERVEL%DATA(SELF%PVERVEL, IBLK) + CALL FIELDS%fPAP%DATA(SELF%PAP, IBLK) + CALL FIELDS%fPLU%DATA(SELF%PLU, IBLK) + CALL FIELDS%fPLUDE%DATA(SELF%PLUDE, IBLK) + CALL FIELDS%fPSNDE%DATA(SELF%PSNDE, IBLK) + CALL FIELDS%fPMFU%DATA(SELF%PMFU, IBLK) + CALL FIELDS%fPMFD%DATA(SELF%PMFD, IBLK) + CALL FIELDS%fPA%DATA(SELF%PA, IBLK) + CALL FIELDS%fPSUPSAT%DATA(SELF%PSUPSAT, IBLK) + CALL FIELDS%fPLSM%DATA(SELF%PLSM, IBLK) + CALL FIELDS%fLDCUM%DATA(SELF%LDCUM, IBLK) + CALL FIELDS%fKTYPE%DATA(SELF%KTYPE, IBLK) + CALL FIELDS%fPAPH%DATA(SELF%PAPH, IBLK) + CALL FIELDS%fPEXTRA%DATA(SELF%PEXTRA, IBLK) + CALL FIELDS%fPCLV%DATA(SELF%PCLV, IBLK) + + CALL FIELDS%fTENDENCY_CML%DATA(TMP3D, IBLK) SELF%TENDENCY_CML%T => TMP3D(:,:,1) SELF%TENDENCY_CML%A => TMP3D(:,:,2) SELF%TENDENCY_CML%Q => TMP3D(:,:,3) SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) - FIELD = FSET%FIELD('TENDENCY_TMP') - CALL FIELD%DATA(TMP3D, IBLK) + CALL FIELDS%fTENDENCY_TMP%DATA(TMP3D, IBLK) SELF%TENDENCY_TMP%T => TMP3D(:,:,1) SELF%TENDENCY_TMP%A => TMP3D(:,:,2) SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) - FIELD = FSET%FIELD('TENDENCY_LOC') - CALL FIELD%DATA(TMP3D, IBLK) + CALL FIELDS%fTENDENCY_LOC%DATA(TMP3D, IBLK) SELF%TENDENCY_LOC%T => TMP3D(:,:,1) SELF%TENDENCY_LOC%A => TMP3D(:,:,2) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) - DO IVAR = 1, SIZE(SELF%OUT_VARS_2D_REAL64) - FIELD = FSET%FIELD(TRIM(OUT_VAR_NAMES(IVAR))) - CALL FIELD%DATA(SELF%OUT_VARS_2D_REAL64(IVAR)%PTR, IBLK) - ENDDO - SELF%PFSQLF => SELF%OUT_VARS_2D_REAL64(1)%PTR - SELF%PFSQIF => SELF%OUT_VARS_2D_REAL64(2)%PTR - SELF%PFCQLNG => SELF%OUT_VARS_2D_REAL64(3)%PTR - SELF%PFCQNNG => SELF%OUT_VARS_2D_REAL64(4)%PTR - SELF%PFSQRF => SELF%OUT_VARS_2D_REAL64(5)%PTR - SELF%PFSQSF => SELF%OUT_VARS_2D_REAL64(6)%PTR - SELF%PFCQRNG => SELF%OUT_VARS_2D_REAL64(7)%PTR - SELF%PFCQSNG => SELF%OUT_VARS_2D_REAL64(8)%PTR - SELF%PFSQLTUR => SELF%OUT_VARS_2D_REAL64(9)%PTR - SELF%PFSQITUR => SELF%OUT_VARS_2D_REAL64(10)%PTR - SELF%PFPLSL => SELF%OUT_VARS_2D_REAL64(11)%PTR - SELF%PFPLSN => SELF%OUT_VARS_2D_REAL64(12)%PTR - SELF%PFHPSL => SELF%OUT_VARS_2D_REAL64(13)%PTR - SELF%PFHPSN => SELF%OUT_VARS_2D_REAL64(14)%PTR - SELF%PCOVPTOT => SELF%OUT_VARS_2D_REAL64(15)%PTR - - FIELD = FSET%FIELD('PRAINFRAC_TOPRFZ') - CALL FIELD%DATA(SELF%PRAINFRAC_TOPRFZ, IBLK) + CALL FIELDS%fPFSQLF%DATA(SELF%PFSQLF, IBLK) + CALL FIELDS%fPFSQIF%DATA(SELF%PFSQIF, IBLK) + CALL FIELDS%fPFCQLNG%DATA(SELF%PFCQLNG, IBLK) + CALL FIELDS%fPFCQNNG%DATA(SELF%PFCQNNG, IBLK) + CALL FIELDS%fPFSQRF%DATA(SELF%PFSQRF, IBLK) + CALL FIELDS%fPFSQSF%DATA(SELF%PFSQSF, IBLK) + CALL FIELDS%fPFCQRNG%DATA(SELF%PFCQRNG, IBLK) + CALL FIELDS%fPFCQSNG%DATA(SELF%PFCQSNG, IBLK) + CALL FIELDS%fPFSQLTUR%DATA(SELF%PFSQLTUR, IBLK) + CALL FIELDS%fPFSQITUR%DATA(SELF%PFSQITUR, IBLK) + CALL FIELDS%fPFPLSL%DATA(SELF%PFPLSL, IBLK) + CALL FIELDS%fPFPLSN%DATA(SELF%PFPLSN, IBLK) + CALL FIELDS%fPFHPSL%DATA(SELF%PFHPSL, IBLK) + CALL FIELDS%fPFHPSN%DATA(SELF%PFHPSN, IBLK) + CALL FIELDS%fPCOVPTOT%DATA(SELF%PCOVPTOT, IBLK) + CALL FIELDS%fPRAINFRAC_TOPRFZ%DATA(SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) From a0248615e1e36f0e314293f806603e248399ed96 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 10 May 2023 08:52:41 +0000 Subject: [PATCH 032/174] Python-f2py: Add expand_field routine to enable ngptot > klon --- src/cloudsc_python/drivers/cloudsc_f2py.py | 10 +-- src/cloudsc_python/src/cloudscf2py/inputs.py | 67 +++++++++++++------- 2 files changed, 48 insertions(+), 29 deletions(-) diff --git a/src/cloudsc_python/drivers/cloudsc_f2py.py b/src/cloudsc_python/drivers/cloudsc_f2py.py index a55c396b..c192dc46 100644 --- a/src/cloudsc_python/drivers/cloudsc_f2py.py +++ b/src/cloudsc_python/drivers/cloudsc_f2py.py @@ -82,14 +82,14 @@ def run_cloudsc_kernel(ngptot, nproma, input_path, reference_path): cloudsc_py ) - fields = load_input_fields(path=input_path) + fields = load_input_fields(path=input_path, ngptot=ngptot) yrecldp, yrmcst, yrethf, yrephli, yrecld = load_input_parameters(path=input_path) - cloudsc_args = { + cloudsc_args = {k.lower(): v for k, v in fields.items()} - } - cloudsc_args.update( (k.lower(), v) for k, v in fields.items() ) + # We process only one block for now, all in one go + cloudsc_args['klon'] = ngptot cloudsc_py( kidia=1, kfdia=ngptot, **cloudsc_args, @@ -97,7 +97,7 @@ def run_cloudsc_kernel(ngptot, nproma, input_path, reference_path): ) # Validate the output fields against reference data - reference = load_reference_fields(path=reference_path) + reference = load_reference_fields(path=reference_path, ngptot=ngptot) cloudsc_validate(cloudsc_args, reference, kidia=1, kfdia=ngptot) diff --git a/src/cloudsc_python/src/cloudscf2py/inputs.py b/src/cloudsc_python/src/cloudscf2py/inputs.py index 039e6a2f..d015e609 100644 --- a/src/cloudsc_python/src/cloudscf2py/inputs.py +++ b/src/cloudsc_python/src/cloudscf2py/inputs.py @@ -9,6 +9,7 @@ # nor does it submit to any jurisdiction. +import math import h5py import numpy as np @@ -19,7 +20,20 @@ NCLV = 5 # number of microphysics variables -def load_input_fields(path, transpose=False): +def expand_field(f, klon, ngptot): + """ + Expands a given field in the horizontal and replicates column data. + + Note, that this does not yet support IFS-style memory blocking (NPROMA). + """ + rank = len(f.shape) + m = math.ceil(ngptot/klon) + + f_new = np.empty_like(f, shape=f.shape[:-1] +(ngptot,)) + f_new[...] = np.tile(f, (1,)*(rank-1) + (m,))[...,:ngptot] + return f_new + +def load_input_fields(path, transpose=False, ngptot=100): """ """ fields = OrderedDict() @@ -43,28 +57,29 @@ def load_input_fields(path, transpose=False): for argname in argnames: fields[argname] = np.ascontiguousarray(f[argname]) - - fields['TENDENCY_LOC_A'] = np.ndarray(order="C", shape=(klev, klon)) - fields['TENDENCY_LOC_T'] = np.ndarray(order="C", shape=(klev, klon)) - fields['TENDENCY_LOC_Q'] = np.ndarray(order="C", shape=(klev, klon)) - fields['TENDENCY_LOC_CLD'] = np.ndarray(order="C", shape=(NCLV, klev, klon)) - fields['PCOVPTOT'] = np.ndarray(order="C", shape=(klev, klon)) - fields['PRAINFRAC_TOPRFZ'] = np.ndarray(order="C", shape=(klon,)) - - fields['PFSQLF'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFSQIF'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFCQNNG'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFCQLNG'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFSQRF'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFSQSF'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFCQRNG'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFCQSNG'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFSQLTUR'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFSQITUR'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFPLSL'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFPLSN'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFHPSL'] = np.ndarray(order="C", shape=(klev+1, klon)) - fields['PFHPSN'] = np.ndarray(order="C", shape=(klev+1, klon)) + fields[argname] = expand_field(fields[argname], klon, ngptot=ngptot) + + fields['TENDENCY_LOC_A'] = np.ndarray(order="C", shape=(klev, ngptot)) + fields['TENDENCY_LOC_T'] = np.ndarray(order="C", shape=(klev, ngptot)) + fields['TENDENCY_LOC_Q'] = np.ndarray(order="C", shape=(klev, ngptot)) + fields['TENDENCY_LOC_CLD'] = np.ndarray(order="C", shape=(NCLV, klev, ngptot)) + fields['PCOVPTOT'] = np.ndarray(order="C", shape=(klev, ngptot)) + fields['PRAINFRAC_TOPRFZ'] = np.ndarray(order="C", shape=(ngptot,)) + + fields['PFSQLF'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFSQIF'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFCQNNG'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFCQLNG'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFSQRF'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFSQSF'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFCQRNG'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFCQSNG'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFSQLTUR'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFSQITUR'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFPLSL'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFPLSN'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFHPSL'] = np.ndarray(order="C", shape=(klev+1, ngptot)) + fields['PFHPSN'] = np.ndarray(order="C", shape=(klev+1, ngptot)) return fields @@ -143,7 +158,7 @@ class TECLD: return yrecldp, yrmcst, yrethf, yrephli, yrecld -def load_reference_fields(path): +def load_reference_fields(path, ngptot=100): """ """ fields = OrderedDict() @@ -157,7 +172,11 @@ def load_reference_fields(path): ] with h5py.File(path, 'r') as f: + fields['KLON'] = f['KLON'][0] + klon = fields['KLON'] + for argname in argnames: fields[argname.lower()] = np.ascontiguousarray(f[argname]) + fields[argname.lower()] = expand_field(fields[argname.lower()], klon, ngptot=ngptot) return fields From 2cefedc3319054143bd12faa8c15dd9dae5d6fcb Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 11 May 2023 04:04:06 +0000 Subject: [PATCH 033/174] Python-f2py: Fix CI builder setup for new python variant (again) --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5bca7bd0..5b634e3c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,7 +46,7 @@ jobs: pyiface_flag: [''] # Flag to enable Python-interface variant - python_flag: [''] # Flag to enable Python variants + python_f2py_flag: [''] # Flag to enable Python variants include: # Add nvhpc build configurations with serialbox and HDF5 @@ -58,7 +58,7 @@ jobs: cuda_flag: '--with-cuda' loki_flag: '--with-loki' pyiface_flag: '' - python_flag: '' + python_f2py_flag: '' - arch: github/ubuntu/nvhpc/21.9 io_library_flag: '--with-serialbox' mpi_flag: '' @@ -67,7 +67,7 @@ jobs: cuda_flag: '--with-cuda' loki_flag: '--with-loki' pyiface_flag: '' - python_flag: '' + python_f2py_flag: '' # Add pyiface build configuration for HDF5 only - arch: github/ubuntu/gnu/9.4.0 io_library_flag: '' @@ -77,7 +77,7 @@ jobs: cuda_flag: '' loki_flag: '' pyiface_flag: '--cloudsc-fortran-pyiface=ON' - python_flag: '--cloudsc-python-f2py=ON' + python_f2py_flag: '--cloudsc-python-f2py=ON' # Steps represent a sequence of tasks that will be executed as part of the job steps: @@ -136,7 +136,7 @@ jobs: loki_flag: ${{ matrix.loki_flag }} claw_flag: ${{ matrix.claw_flag }} pyiface_flag: ${{ matrix.pyiface_flag }} - python_flag: ${{ matrix.python_f2py_flag }} + python_f2py_flag: ${{ matrix.python_f2py_flag }} run: .github/scripts/verify-targets.sh # Run double-precision targets From 72a5f0242d803ea0f52f5f697e2f44065226558c Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 11 May 2023 06:54:47 +0000 Subject: [PATCH 034/174] Python-f2py: More CI config fixes --- .github/scripts/run-targets.sh | 2 +- .github/scripts/verify-targets.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/run-targets.sh b/.github/scripts/run-targets.sh index ac802ee2..1cfa9f11 100755 --- a/.github/scripts/run-targets.sh +++ b/.github/scripts/run-targets.sh @@ -51,7 +51,7 @@ do bin/$target --numomp 1 --ngptot 100 --nproma 64 elif [[ "$target" == "cloudsc_f2py.py" ]] then - bin/$target --nthreads 1 --ngptot 100 --nproma 64 + bin/$target --ngptot 100 --nproma 128 else # Single thread, safe NPROMA bin/$target 1 100 64 diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 7024e32b..0cdc6b8e 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -56,7 +56,7 @@ then targets+=(cloudsc_pyiface.py) fi -if [[ "$python_flag" == "--cloudsc-python-f2py=ON" ]] +if [[ "$python_f2py_flag" == "--cloudsc-python-f2py=ON" ]] then targets+=(cloudsc_f2py.py) fi From a868e37107ee96c94f629529d5076cedd3f11e7e Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Fri, 12 May 2023 08:45:53 +0000 Subject: [PATCH 035/174] Python-gt4py: Removing GT4Py-based implementation of CLOUDSC Due to licensing issues we need to remove this. Hopefully this can be re-introduced soon. --- src/cloudsc_python/README.md | 48 - src/cloudsc_python/bootstrap_venv.sh | 72 - src/cloudsc_python/drivers/__init__.py | 9 - src/cloudsc_python/drivers/config.py | 177 -- src/cloudsc_python/drivers/run.py | 204 -- src/cloudsc_python/drivers/run_batch.sh | 115 - src/cloudsc_python/drivers/run_fortran.py | 143 -- src/cloudsc_python/drivers/run_split.py | 139 - src/cloudsc_python/drivers/utils.py | 57 - src/cloudsc_python/pyproject.toml | 10 +- src/cloudsc_python/requirements.txt | 8 - src/cloudsc_python/requirements_dev.txt | 8 - src/cloudsc_python/src/cloudsc4py/__init__.py | 26 - .../src/cloudsc4py/framework/__init__.py | 9 - .../src/cloudsc4py/framework/components.py | 169 -- .../src/cloudsc4py/framework/config.py | 71 - .../src/cloudsc4py/framework/grid.py | 86 - .../src/cloudsc4py/framework/stencil.py | 82 - .../src/cloudsc4py/framework/storage.py | 173 -- .../src/cloudsc4py/initialization/__init__.py | 9 - .../cloudsc4py/initialization/reference.py | 118 - .../src/cloudsc4py/initialization/state.py | 142 - .../src/cloudsc4py/initialization/utils.py | 49 - .../src/cloudsc4py/physics/__init__.py | 12 - .../cloudsc4py/physics/_stencils/__init__.py | 17 - .../cloudsc4py/physics/_stencils/cloudsc.py | 2186 ---------------- .../physics/_stencils/cloudsc_split.py | 2279 ----------------- .../cloudsc4py/physics/_stencils/cuadjtq.py | 40 - .../src/cloudsc4py/physics/_stencils/fccld.py | 25 - .../cloudsc4py/physics/_stencils/fcttre.py | 83 - .../cloudsc4py/physics/_stencils/helpers.py | 269 -- .../src/cloudsc4py/physics/cloudsc.py | 227 -- .../src/cloudsc4py/physics/cloudsc_split.py | 318 --- .../src/cloudsc4py/utils/__init__.py | 9 - .../src/cloudsc4py/utils/f2py.py | 48 - .../src/cloudsc4py/utils/iox.py | 328 --- .../src/cloudsc4py/utils/numpyx.py | 38 - .../src/cloudsc4py/utils/timing.py | 30 - .../src/cloudsc4py/utils/typingx.py | 28 - .../src/cloudsc4py/utils/validation.py | 58 - 40 files changed, 2 insertions(+), 7917 deletions(-) delete mode 100644 src/cloudsc_python/README.md delete mode 100755 src/cloudsc_python/bootstrap_venv.sh delete mode 100644 src/cloudsc_python/drivers/__init__.py delete mode 100644 src/cloudsc_python/drivers/config.py delete mode 100644 src/cloudsc_python/drivers/run.py delete mode 100755 src/cloudsc_python/drivers/run_batch.sh delete mode 100644 src/cloudsc_python/drivers/run_fortran.py delete mode 100644 src/cloudsc_python/drivers/run_split.py delete mode 100644 src/cloudsc_python/drivers/utils.py delete mode 100644 src/cloudsc_python/requirements.txt delete mode 100644 src/cloudsc_python/requirements_dev.txt delete mode 100644 src/cloudsc_python/src/cloudsc4py/__init__.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/framework/__init__.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/framework/components.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/framework/config.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/framework/grid.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/framework/stencil.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/framework/storage.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/initialization/__init__.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/initialization/reference.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/initialization/state.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/initialization/utils.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/__init__.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/_stencils/__init__.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/cloudsc.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/physics/cloudsc_split.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/utils/__init__.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/utils/f2py.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/utils/iox.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/utils/numpyx.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/utils/timing.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/utils/typingx.py delete mode 100644 src/cloudsc_python/src/cloudsc4py/utils/validation.py diff --git a/src/cloudsc_python/README.md b/src/cloudsc_python/README.md deleted file mode 100644 index a464d81c..00000000 --- a/src/cloudsc_python/README.md +++ /dev/null @@ -1,48 +0,0 @@ -This folder contains a Python implementation of the CLOUDSC microphysics scheme based on -[GT4Py](https://github.com/GridTools/gt4py/tree/master). The code is bundled as an installable -package called `cloudsc4py`, whose source code is placed under `src/`. - -We strongly recommend installing the package in an isolated virtual environment, which can be -created by issuing the following command from within this directory: -```shell -$ python -m venv venv -``` -The virtual environment will be contained in the folder `venv/` and can be activated with -```shell -$ source venv/bin/activate -``` -and deactivated with -```shell -$ (venv) deactivate -``` -The package `cloudsc4py` can be installed via the Python package manager [pip](https://pypi.org/project/pip/): -```shell -$ (venv) pip install -e . -``` -The resulting installation will work on CPU only. To get access to the GPU-accelerated backends of -GT4Py, [CuPy](https://cupy.dev/) is required. We suggest installing CuPy as a precompiled binary -package (wheel) -```shell -$ (venv) pip install cupy-cudaXXX # XXX stands for the CUDA version available on the system -``` -If the installation of CuPy completed successfully, the command -```shell -$ (venv) python -c "import cupy" -``` -should produce no output. -All the aforementioned steps can be executed in a single shot by executing the Bash script `bootstrap_venv.sh`: -```shell -$ FRESH_INSTALL=1 VENV=venv INSTALL_CUPY=1 CUPY_VERSION=cupy-cudaXXX [PIP_UPGRADE=1 INSTALL_PRE_COMMIT=1] ./bootstrap_venv.sh -``` - -The scheme comes in two forms: one where computations are carried out in a single stencil -(see `src/cloudsc4py/{physics,_stencils}/cloudsc.py`), and one where calculations are split into two -stencils (one computing tendencies on the main vertical levels, the other computing fluxes at the -interface levels; see `src/cloudsc4py/{physics,_stencils}/cloudsc_split.py`). - -The easiest way to run the dwarf is through the driver scripts `drivers/run.py` and `drivers/run_split.py`. -Run the two scripts with `--help` option to get the full list of command-line options. - -For the sake of convenience, we provide the driver `drivers/run_fortran.py` to invoke one of the -FORTRAN variants of the dwarf from Python, and the Bash script `drivers/run_batch.sh` to run the -FORTRAN and Python implementations under different settings. diff --git a/src/cloudsc_python/bootstrap_venv.sh b/src/cloudsc_python/bootstrap_venv.sh deleted file mode 100755 index b93e8c2c..00000000 --- a/src/cloudsc_python/bootstrap_venv.sh +++ /dev/null @@ -1,72 +0,0 @@ -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#!/bin/bash - -PYTHON=$(which python3) -PIP_UPGRADE=${PIP_UPGRADE:-1} -VENV=${VENV:-venv} -FRESH_INSTALL=${FRESH_INSTALL:-1} -INSTALL_PRE_COMMIT=${INSTALL_PRE_COMMIT:-1} -INSTALL_CUPY=${INSTALL_CUPY:-0} -CUPY_VERSION=${CUPY_VERSION:-cupy} - - -function install() -{ - # activate environment - source "$VENV"/bin/activate - - # upgrade pip and setuptools - if [ "$PIP_UPGRADE" -ne 0 ]; then - pip install --upgrade pip setuptools wheel - fi - - # install cloudsc4py - pip install -e . - - # setup gt4py cache - mkdir -p gt_cache - echo -e "\nexport GT_CACHE_ROOT=$PWD/gt_cache" >> "$VENV"/bin/activate - - # install cupy - if [ "$INSTALL_CUPY" -eq 1 ]; then - pip install "$CUPY_VERSION" - fi - - # install development packages - pip install -r requirements_dev.txt - - # install pre-commit - if [ "$INSTALL_PRE_COMMIT" -eq 1 ]; then - pre-commit install - fi - - # deactivate environment - deactivate -} - - -if [ "$FRESH_INSTALL" -eq 1 ]; then - echo -e "Creating new environment..." - rm -rf "$VENV" - $PYTHON -m venv "$VENV" -fi - - -install || deactivate - - -echo -e "" -echo -e "Command to activate environment:" -echo -e "\t\$ source $VENV/bin/activate" -echo -e "" -echo -e "Command to deactivate environment:" -echo -e "\t\$ deactivate" -echo -e "" diff --git a/src/cloudsc_python/drivers/__init__.py b/src/cloudsc_python/drivers/__init__.py deleted file mode 100644 index 95e3c8ad..00000000 --- a/src/cloudsc_python/drivers/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. diff --git a/src/cloudsc_python/drivers/config.py b/src/cloudsc_python/drivers/config.py deleted file mode 100644 index ac7e4e5d..00000000 --- a/src/cloudsc_python/drivers/config.py +++ /dev/null @@ -1,177 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -import numpy as np -from os.path import dirname, join, normpath, splitext -from pydantic import BaseModel, validator -import socket -from typing import Literal, Optional - -from cloudsc4py.framework.config import DataTypes, GT4PyConfig - - -class IOConfig(BaseModel): - """Gathers options for I/O.""" - - output_csv_file: Optional[str] - host_name: Optional[str] - - @validator("output_csv_file") - @classmethod - def check_extension(cls, v: Optional[str]) -> Optional[str]: - if v is None: - return v - - basename, extension = splitext(v) - if extension == "": - return v + ".csv" - elif extension == ".csv": - return v - else: - return basename + ".csv" - - @validator("host_name") - @classmethod - def set_host_name(cls, v: Optional[str]) -> str: - return v or socket.gethostname() - - def with_host_name(self, host_name: str) -> IOConfig: - args = self.dict() - args["host_name"] = host_name - return IOConfig(**args) - - def with_output_csv_file(self, output_csv_file: str) -> IOConfig: - args = self.dict() - args["output_csv_file"] = output_csv_file - return IOConfig(**args) - - -default_io_config = IOConfig(output_file=None, host_name=None) - - -class PythonConfig(BaseModel): - """Gathers options controlling execution of Python/GT4Py code.""" - - # domain - num_cols: Optional[int] - - # validation - enable_validation: bool - input_file: str - reference_file: str - - # run - num_runs: int - - # low-level and/or backend-related - data_types: DataTypes - gt4py_config: GT4PyConfig - sympl_enable_checks: bool - - @validator("gt4py_config") - @classmethod - def add_dtypes(cls, v, values) -> GT4PyConfig: - return v.with_dtypes(values["data_types"]) - - def with_backend(self, backend: Optional[str]) -> PythonConfig: - args = self.dict() - args["gt4py_config"] = GT4PyConfig(**args["gt4py_config"]).with_backend(backend).dict() - return PythonConfig(**args) - - def with_checks(self, enabled: bool) -> PythonConfig: - args = self.dict() - args["gt4py_config"] = ( - GT4PyConfig(**args["gt4py_config"]).with_validate_args(enabled).dict() - ) - args["sympl_enable_checks"] = enabled - return PythonConfig(**args) - - def with_num_cols(self, num_cols: Optional[int]) -> PythonConfig: - args = self.dict() - if num_cols is not None: - args["num_cols"] = num_cols - return PythonConfig(**args) - - def with_num_runs(self, num_runs: Optional[int]) -> PythonConfig: - args = self.dict() - if num_runs is not None: - args["num_runs"] = num_runs - return PythonConfig(**args) - - def with_precision(self, precision: Literal["double", "single"]) -> PythonConfig: - args = self.dict() - args["data_types"] = self.data_types.with_precision(precision) - return PythonConfig(**args) - - def with_validation(self, enabled: bool) -> PythonConfig: - args = self.dict() - args["enable_validation"] = enabled - return PythonConfig(**args) - - -config_files_dir = normpath(join(dirname(__file__), "../../../config-files")) -default_python_config = PythonConfig( - num_cols=1, - enable_validation=True, - input_file=join(config_files_dir, "input.h5"), - reference_file=join(config_files_dir, "reference.h5"), - num_runs=15, - data_types=DataTypes(bool=bool, float=np.float64, int=np.int64), - gt4py_config=GT4PyConfig(backend="numpy", rebuild=False, validate_args=True, verbose=True), - sympl_enable_checks=True, -) - - -class FortranConfig(BaseModel): - """Gathers options controlling execution of FORTRAN code.""" - - build_dir: str - variant: str - nproma: int - num_cols: int - num_runs: int - num_threads: int - - def with_build_dir(self, build_dir: str) -> FortranConfig: - args = self.dict() - args["build_dir"] = build_dir - return FortranConfig(**args) - - def with_nproma(self, nproma: int) -> FortranConfig: - args = self.dict() - args["nproma"] = nproma - return FortranConfig(**args) - - def with_num_cols(self, num_cols: int) -> FortranConfig: - args = self.dict() - args["num_cols"] = num_cols - return FortranConfig(**args) - - def with_num_runs(self, num_runs: int) -> FortranConfig: - args = self.dict() - args["num_runs"] = num_runs - return FortranConfig(**args) - - def with_num_threads(self, num_threads: int) -> FortranConfig: - args = self.dict() - args["num_threads"] = num_threads - return FortranConfig(**args) - - def with_variant(self, variant: str) -> FortranConfig: - args = self.dict() - args["variant"] = variant - return FortranConfig(**args) - - -default_fortran_config = FortranConfig( - build_dir=".", variant="fortran", nproma=32, num_cols=1, num_runs=1, num_threads=1 -) diff --git a/src/cloudsc_python/drivers/run.py b/src/cloudsc_python/drivers/run.py deleted file mode 100644 index 7a844f86..00000000 --- a/src/cloudsc_python/drivers/run.py +++ /dev/null @@ -1,204 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import click -import csv -import datetime -import os -from typing import Optional, Type - -from cloudsc4py.framework.grid import ComputationalGrid -from cloudsc4py.physics.cloudsc import Cloudsc -from cloudsc4py.initialization.reference import get_reference_tendencies, get_reference_diagnostics -from cloudsc4py.initialization.state import get_state -from cloudsc4py.utils.iox import HDF5Reader -from cloudsc4py.utils.timing import timing -from cloudsc4py.utils.validation import validate - -from config import PythonConfig, IOConfig, default_python_config, default_io_config -from utils import print_performance, to_csv - - -def core(config: PythonConfig, io_config: IOConfig, cloudsc_cls: Type) -> None: - hdf5_reader = HDF5Reader(config.input_file, config.data_types) - - nx = config.num_cols or hdf5_reader.get_nlon() - nz = hdf5_reader.get_nlev() - computational_grid = ComputationalGrid(nx, 1, nz) - - state = get_state(computational_grid, hdf5_reader, gt4py_config=config.gt4py_config) - dt = hdf5_reader.get_timestep() - - yoecldp_paramaters = hdf5_reader.get_yoecldp_parameters() - yoethf_parameters = hdf5_reader.get_yoethf_parameters() - yomcst_parameters = hdf5_reader.get_yomcst_parameters() - yrecldp_parameters = hdf5_reader.get_yrecldp_parameters() - - cloudsc = cloudsc_cls( - computational_grid, - yoecldp_paramaters, - yoethf_parameters, - yomcst_parameters, - yrecldp_parameters, - enable_checks=config.sympl_enable_checks, - gt4py_config=config.gt4py_config, - ) - tends, diags = cloudsc(state, dt) - - runtimes = [] - for i in range(config.num_runs): - with timing(f"run_{i}") as timer: - cloudsc(state, dt, out_tendencies=tends, out_diagnostics=diags) - runtimes.append(timer.get_time(f"run_{i}")) - - runtime_mean, runtime_stddev = print_performance(runtimes) - - if io_config.output_csv_file is not None: - to_csv( - io_config.output_csv_file, - io_config.host_name, - config.gt4py_config.backend, - nx, - config.num_runs, - runtime_mean, - runtime_stddev, - ) - - if config.enable_validation: - hdf5_reader_ref = HDF5Reader(config.reference_file, config.data_types) - tends_ref = get_reference_tendencies( - computational_grid, hdf5_reader_ref, gt4py_config=config.gt4py_config - ) - diags_ref = get_reference_diagnostics( - computational_grid, hdf5_reader_ref, gt4py_config=config.gt4py_config - ) - - tends_fail = validate(tends, tends_ref) - if len(tends_fail) == 0: - print("Results: All tendencies have been successfully validated. HOORAY!") - else: - print( - f"Results: Validation failed for {len(tends_fail)}/{len(tends_ref) - 1} " - f"tendencies: {', '.join(tends_fail)}." - ) - - diags_fail = validate(diags, diags_ref) - if len(diags_fail) == 0: - print("Results: All diagnostics have been successfully validated. HOORAY!") - else: - print( - f"Results: Validation failed for {len(diags_fail)}/{len(diags_ref) - 1} " - f"diagnostics: {', '.join(diags_fail)}." - ) - - -@click.command() -@click.option( - "--backend", - type=str, - default=None, - help="GT4Py backend." - "\n\nOptions: numpy, gt:cpu_kfirst, gt:cpu_ifirst, gt:gpu, cuda, dace:cpu, dace:gpu." - "\n\nDefault: numpy.", -) -@click.option( - "--enable-checks/--disable-checks", - is_flag=True, - type=bool, - default=False, - help="Enable/disable sanity checks performed by Sympl and GT4Py.\n\nDefault: enabled.", -) -@click.option( - "--enable-validation/--disable-validation", - is_flag=True, - type=bool, - default=True, - help="Enable/disable data validation.\n\nDefault: enabled.", -) -@click.option("--num-cols", type=int, default=None, help="Number of domain columns.\n\nDefault: 1.") -@click.option( - "--num-runs", - type=int, - default=1, - help="Number of executions.\n\nDefault: 1.", -) -@click.option( - "--precision", - type=str, - default="double", - help="Select either `double` (default) or `single` precision.", -) -@click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).") -@click.option( - "--output-csv-file", - type=str, - default=None, - help="Path to the CSV file where writing performance counters (optional).", -) -@click.option( - "--output-csv-file-stencils", - type=str, - default=None, - help="Path to the CSV file where writing performance counters for each stencil (optional).", -) -def main( - backend: Optional[str], - enable_checks: bool, - enable_validation: bool, - num_cols: Optional[int], - num_runs: Optional[int], - precision: str, - host_alias: Optional[str], - output_csv_file: Optional[str], - output_csv_file_stencils: Optional[str], -) -> None: - """ - Driver for the GT4Py-based implementation of CLOUDSC. - - Computations are carried out in a single stencil. - """ - config = ( - default_python_config.with_backend(backend) - .with_checks(enable_checks) - .with_validation(enable_validation) - .with_num_cols(num_cols) - .with_num_runs(num_runs) - .with_precision(precision) - ) - io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias) - core(config, io_config, cloudsc_cls=Cloudsc) - - if output_csv_file_stencils is not None: - call_time = None - for key, value in config.gt4py_config.exec_info.items(): - if "cloudsc" in key: - call_time = value["total_call_time"] * 1000 / config.num_runs - - if not os.path.exists(output_csv_file_stencils): - with open(output_csv_file_stencils, "w") as f: - writer = csv.writer(f, delimiter=",") - writer.writerow(("date", "host", "backend", "num_cols", "num_runs", "cloudsc")) - with open(output_csv_file_stencils, "a") as f: - writer = csv.writer(f, delimiter=",") - writer.writerow( - ( - datetime.date.today().strftime("%Y%m%d"), - io_config.host_name, - config.gt4py_config.backend, - config.num_cols, - config.num_runs, - call_time, - ) - ) - - -if __name__ == "__main__": - main() diff --git a/src/cloudsc_python/drivers/run_batch.sh b/src/cloudsc_python/drivers/run_batch.sh deleted file mode 100755 index aaf609a7..00000000 --- a/src/cloudsc_python/drivers/run_batch.sh +++ /dev/null @@ -1,115 +0,0 @@ -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -#!/bin/bash - -# === general -# name of the host machine -HOST=meluxina -# list of number of columns -NUM_COLS_L=( 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 ) - -# === FORTRAN -# list of environments -# options: nvhpc -FORTRAN_ENV_L=( ) -# list of variants -# options: fortran, gpu-scc, gpu-scc-hoist, gpu-omp-scc-hoist -FORTRAN_VARIANT_L=( fortran gpu-scc gpu-scc-hoist gpu-omp-scc-hoist ) -# list of NPROMA values (array must have the same length of FORTRAN_VARIANT_L) -# recommended values: 32 for CPUs, 128 on GPUs -NPROMA_L=( 32 128 128 128 ) -# list of number of threads (array must have the same length of FORTRAN_VARIANT_L) -# recommended values: 24 on Piz Daint's CPUs, 128 on MLux's CPUs, 1 on GPUs -FORTRAN_NUM_THREADS_L=( 128 1 1 1 ) - -# === python -# list of environments -# options: aocc gcc intel -PYTHON_ENV_L=( aocc gcc intel ) -# list of C compilers (array must have the same length of PYTHON_ENV_L) -CC_L=( clang gcc icx ) -# list of C++ compilers (array must have the same length of PYTHON_ENV_L) -CXX_L=( clang++ g++ icx ) -# list of C++ compiler flags (array must have the same length of PYTHON_ENV_L) -CXXFLAGS_L=( "-fbracket-depth=1024" "" "-fbracket-depth=1024" ) -# list of linker flags (array must have the same length of PYTHON_ENV_L) -LFLAGS_L=( "" "" "-lstdc++" ) -# list of GT4Py backends -# options: numpy, gt:cpu_ifirst, gt:cpu_kfirst, gt:gpu, cuda, dace:cpu, dace:gpu -GT4PY_BACKEND_L=( gt:cpu_ifirst gt:cpu_kfirst dace:cpu ) -# list of number of threads (array must have the same length of GT4PY_BACKEND_L) -# recommended values: 24 on Piz Daint, 128 on MLux -PYTHON_NUM_THREADS_L=( 128 128 128 128 128 ) - -echo "FORTRAN: start" -LEN_FORTRAN_ENV_L=${#FORTRAN_ENV_L[@]} -LEN_FORTRAN_VARIANT_L=${#FORTRAN_VARIANT_L[@]} - -for (( i=0; i<"$LEN_FORTRAN_ENV_L"; i++ )); do - ENV=${FORTRAN_ENV_L[$i]} - echo " Env: $ENV: start" - - for (( j=0; j<"$LEN_FORTRAN_VARIANT_L"; j++ )); do - VARIANT=${FORTRAN_VARIANT_L[$j]} - mkdir -p ../data/"$HOST"/"$ENV" - echo " Variant: $VARIANT: start" - for NUM_COLS in "${NUM_COLS_L[@]}"; do - echo -n " num_cols=$NUM_COLS: " - python run_fortran.py \ - --build-dir=../../../../develop/build/"$ENV" \ - --nproma="${NPROMA_L[$j]}" \ - --num-runs=20 \ - --num-threads="${FORTRAN_NUM_THREADS_L[$j]}" \ - --output-csv-file=../data/"$HOST"/"$ENV"/performance.csv \ - --host-alias="$HOST" \ - --variant="$VARIANT" \ - --num-cols="$NUM_COLS" || true - done - echo " Variant: $FORTRAN_MODE: end" - done - echo " Env: $ENV: end" -done -echo "FORTRAN: end" - -echo "" - -echo "Python: start" -LEN_PYTHON_ENV_L=${#PYTHON_ENV_L[@]} -LEN_GT4PY_BACKEND_L=${#GT4PY_BACKEND_L[@]} - -for (( i=0; i<"$LEN_PYTHON_ENV_L"; i++ )); do - ENV=${PYTHON_ENV_L[$i]} - echo " Env: $ENV: start" - export GT_CACHE_ROOT=$PWD/../gt_cache/"$ENV" - mkdir -p ../data/"$HOST"/"$ENV" - - for (( j=0; j<"$LEN_GT4PY_BACKEND_L"; j++ )); do - GT4PY_BACKEND=${GT4PY_BACKEND_L[$j]} - echo " Backend: $GT4PY_BACKEND: start" - - for NUM_COLS in "${NUM_COLS_L[@]}"; do - echo -n " num_cols=$NUM_COLS: " - OMP_NUM_THREADS=${PYTHON_NUM_THREADS_L[$j]} \ - CC=${CC_L[$i]} CXX=${CXX_L[$i]} CXXFLAGS=${CXXFLAGS_L[$i]} LFLAGS=${LFLAGS_L[$i]} CUDA_HOST_CXX=${CXX_L[$i]} \ - python run_split.py \ - --num-runs=20 \ - --disable-checks \ - --disable-validation \ - --host-alias="$HOST" \ - --backend="$GT4PY_BACKEND" \ - --num-cols="$NUM_COLS" \ - --output-csv-file=../data/"$HOST"/"$ENV"/performance_split.csv \ - --output-csv-file-stencils=../data/"$HOST"/"$ENV"/performance_split_stencils.csv || true - done - echo " Backend: $GT4PY_BACKEND: end" - done - echo " Env: $ENV: end" -done -echo "Python: end" diff --git a/src/cloudsc_python/drivers/run_fortran.py b/src/cloudsc_python/drivers/run_fortran.py deleted file mode 100644 index 5f97543e..00000000 --- a/src/cloudsc_python/drivers/run_fortran.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import click -import os -import subprocess -from typing import Optional - -from config import FortranConfig, IOConfig, default_fortran_config, default_io_config -from utils import print_performance, to_csv - - -def core(config: FortranConfig, io_config: IOConfig) -> None: - executable = os.path.join( - os.path.dirname(__file__), config.build_dir, f"bin/dwarf-cloudsc-{config.variant}" - ) - if not os.path.exists(executable): - raise RuntimeError(f"The executable `{executable}` does not exist.") - - # warm-up cache - _ = subprocess.run( - [ - executable, - str(config.num_threads), - str(config.num_cols), - str(min(config.num_cols, config.nproma)), - ], - capture_output=True, - ) - - # run and profile - runtimes = [] - for _ in range(config.num_runs): - out = subprocess.run( - [ - executable, - str(config.num_threads), - str(config.num_cols), - str(min(config.num_cols, config.nproma)), - ], - capture_output=True, - ) - if "gpu" in config.variant: - x = out.stderr.decode("utf-8").split("\n")[2] - y = x.split(" ") - z = [c for c in y if c != ""] - runtimes.append(float(z[-4])) - else: - x = out.stderr.decode("utf-8").split("\n")[-2] - y = x.split(" ") - z = [c for c in y if c != ""] - runtimes.append(float(z[-4])) - - runtime_mean, runtime_stddev = print_performance(runtimes) - - if io_config.output_csv_file is not None: - to_csv( - io_config.output_csv_file, - io_config.host_name, - config.variant, - config.num_cols, - config.num_runs, - runtime_mean, - runtime_stddev, - ) - - -@click.command() -@click.option( - "--build-dir", - type=str, - default="fortran", - help="Path to the build directory of the FORTRAN dwarf.", -) -@click.option( - "--variant", - type=str, - default="fortran", - help="Code variant." - "\n\nOptions: fortran, gpu-scc, gpu-scc-hoist, gpu-omp-scc-hoist." - "\n\nDefault: fortran.", -) -@click.option( - "--nproma", - type=int, - default=32, - help="Block size.\n\nRecommended values: 32 on CPUs, 128 on GPUs.\n\nDefault: 32.", -) -@click.option("--num-cols", type=int, default=1, help="Number of domain columns.\n\nDefault: 1.") -@click.option( - "--num-runs", - type=int, - default=1, - help="Number of executions.\n\nDefault: 1.", -) -@click.option( - "--num-threads", - type=int, - default=1, - help="Number of threads." - "\n\nRecommended values: 24 on Piz Daint's CPUs, 128 on MLux's CPUs, 1 on GPUs." - "\n\nDefault: 1.", -) -@click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).") -@click.option( - "--output-csv-file", - type=str, - default=None, - help="Path to the CSV file where writing performance counters (optional).", -) -def main( - build_dir: str, - variant: str, - nproma: int, - num_cols: int, - num_runs: int, - num_threads: int, - host_alias: Optional[str], - output_csv_file: Optional[str], -) -> None: - """Driver for the FORTRAN implementation of CLOUDSC.""" - config = ( - default_fortran_config.with_build_dir(build_dir) - .with_variant(variant) - .with_nproma(nproma) - .with_num_cols(num_cols) - .with_num_runs(num_runs) - .with_num_threads(num_threads) - ) - io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias) - core(config, io_config) - - -if __name__ == "__main__": - main() diff --git a/src/cloudsc_python/drivers/run_split.py b/src/cloudsc_python/drivers/run_split.py deleted file mode 100644 index f29c9b5f..00000000 --- a/src/cloudsc_python/drivers/run_split.py +++ /dev/null @@ -1,139 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import click -import csv -import datetime -import os -from typing import Optional - -from cloudsc4py.physics.cloudsc_split import Cloudsc - -from config import default_python_config, default_io_config -from run import core - - -@click.command() -@click.option( - "--backend", - type=str, - default=None, - help="GT4Py backend." - "\n\nOptions: numpy, gt:cpu_kfirst, gt:cpu_ifirst, gt:gpu, cuda, dace:cpu, dace:gpu." - "\n\nDefault: numpy.", -) -@click.option( - "--enable-checks/--disable-checks", - is_flag=True, - type=bool, - default=False, - help="Enable/disable sanity checks performed by Sympl and GT4Py.\n\nDefault: enabled.", -) -@click.option( - "--enable-validation/--disable-validation", - is_flag=True, - type=bool, - default=True, - help="Enable/disable data validation.\n\nDefault: enabled.", -) -@click.option("--num-cols", type=int, default=None, help="Number of domain columns.\n\nDefault: 1.") -@click.option( - "--num-runs", - type=int, - default=1, - help="Number of executions.\n\nDefault: 1.", -) -@click.option( - "--precision", - type=str, - default="double", - help="Select either `double` (default) or `single` precision.", -) -@click.option("--host-alias", type=str, default=None, help="Name of the host machine (optional).") -@click.option( - "--output-csv-file", - type=str, - default=None, - help="Path to the CSV file where writing performance counters (optional).", -) -@click.option( - "--output-csv-file-stencils", - type=str, - default=None, - help="Path to the CSV file where writing performance counters for each stencil (optional).", -) -def main( - backend: Optional[str], - enable_checks: bool, - enable_validation: bool, - num_cols: Optional[int], - num_runs: Optional[int], - precision: str, - host_alias: Optional[str], - output_csv_file: Optional[str], - output_csv_file_stencils: Optional[str], -) -> None: - """ - Driver for the GT4Py-based implementation of CLOUDSC. - - Computations are split into two stencils. - """ - config = ( - default_python_config.with_backend(backend) - .with_checks(enable_checks) - .with_validation(enable_validation) - .with_num_cols(num_cols) - .with_num_runs(num_runs) - .with_precision(precision) - ) - io_config = default_io_config.with_output_csv_file(output_csv_file).with_host_name(host_alias) - core(config, io_config, cloudsc_cls=Cloudsc) - - if output_csv_file_stencils is not None: - cloudsc_tendencies_call_time = None - cloudsc_fluxes_call_time = None - for key, value in config.gt4py_config.exec_info.items(): - if "tendencies" in key: - cloudsc_tendencies_call_time = value["total_call_time"] * 1000 / config.num_runs - elif "fluxes" in key: - cloudsc_fluxes_call_time = value["total_call_time"] * 1000 / config.num_runs - - if not os.path.exists(output_csv_file_stencils): - with open(output_csv_file_stencils, "w") as f: - writer = csv.writer(f, delimiter=",") - writer.writerow( - ( - "date", - "host", - "backend", - "num_cols", - "num_runs", - "cloudsc_tendencies", - "cloudsc_fluxes", - ) - ) - with open(output_csv_file_stencils, "a") as f: - writer = csv.writer(f, delimiter=",") - writer.writerow( - ( - datetime.date.today().strftime("%Y%m%d"), - io_config.host_name, - config.gt4py_config.backend, - config.num_cols, - config.num_runs, - cloudsc_tendencies_call_time, - cloudsc_fluxes_call_time, - ) - ) - - -if __name__ == "__main__": - main() diff --git a/src/cloudsc_python/drivers/utils.py b/src/cloudsc_python/drivers/utils.py deleted file mode 100644 index 96bef382..00000000 --- a/src/cloudsc_python/drivers/utils.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -import csv -import datetime -import os -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from typing import Tuple - - -def to_csv( - output_file: str, - host_name: str, - variant: str, - num_cols: int, - num_runs: int, - runtime_mean: float, - runtime_stddev: float, -) -> None: - """Write mean and standard deviation of measured runtimes to a CSV file.""" - if not os.path.exists(output_file): - with open(output_file, "w") as csv_file: - writer = csv.writer(csv_file, delimiter=",") - writer.writerow(("date", "host", "variant", "num_cols", "num_runs", "mean", "stddev")) - with open(output_file, "a") as csv_file: - writer = csv.writer(csv_file, delimiter=",") - writer.writerow( - ( - datetime.date.today().strftime("%Y%m%d"), - host_name, - variant, - num_cols, - num_runs, - runtime_mean, - runtime_stddev, - ) - ) - - -def print_performance(runtimes: list[float]) -> Tuple[float, float]: - """Print means and standard deviation of measure runtimes to screen.""" - n = len(runtimes) - mean = sum(runtimes) / n - stddev = (sum((runtime - mean) ** 2 for runtime in runtimes) / (n - 1 if n > 1 else n)) ** 0.5 - print(f"Performance: Average runtime over {n} runs: {mean:.3f} \u00B1 {stddev:.3f} ms.") - return mean, stddev diff --git a/src/cloudsc_python/pyproject.toml b/src/cloudsc_python/pyproject.toml index aa29e770..b39513e6 100644 --- a/src/cloudsc_python/pyproject.toml +++ b/src/cloudsc_python/pyproject.toml @@ -3,10 +3,9 @@ requires = ["setuptools >= 64"] build-backend = "setuptools.build_meta" [project] -name = "cloudsc4py" +name = "cloudscpython" version = "0.1.0" authors = [ - {name = "Stefano Ubbiali", email = "subbiali@phys.ethz.ch"}, {name = "Michael Lange", email = "michael.lange@ecmwf.int"} ] description = "Collection of Python variants of the CLOUDSC dwarf" @@ -27,13 +26,8 @@ classifiers = [ ] dependencies = [ "click", - "gt4py[dace] >= 1.0.1", "h5py", "numpy", - "pandas", - "pydantic", - "sympl @ git+https://github.com/stubbiali/sympl.git@oop#egg=sympl", - "xarray", ] [project.scripts] @@ -44,4 +38,4 @@ repository = "https://github.com/ecmwf-ifs/dwarf-p-cloudsc" [tool.setuptools.packages.find] where = ["src", "."] -include = ["cloudsc4py", "cloudscf2py", "drivers*"] +include = ["cloudscf2py", "drivers*"] diff --git a/src/cloudsc_python/requirements.txt b/src/cloudsc_python/requirements.txt deleted file mode 100644 index c2a4a4b8..00000000 --- a/src/cloudsc_python/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -click -gt4py[dace]>=1.0.1 -h5py -numpy -pandas -pydantic -sympl@git+https://github.com/stubbiali/sympl.git@oop#egg=sympl -xarray diff --git a/src/cloudsc_python/requirements_dev.txt b/src/cloudsc_python/requirements_dev.txt deleted file mode 100644 index 3876e777..00000000 --- a/src/cloudsc_python/requirements_dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -black >= 22.6.0 -flake8 -ipdb -ipython -matplotlib -mypy -pre-commit -pytest diff --git a/src/cloudsc_python/src/cloudsc4py/__init__.py b/src/cloudsc_python/src/cloudsc4py/__init__.py deleted file mode 100644 index fe3b92a3..00000000 --- a/src/cloudsc_python/src/cloudsc4py/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import os - -import gt4py.cartesian.config as gt_config - -import cloudsc4py.physics - - -# customize compilation/linking of GT4Py generated code -cxxflags = os.environ.get("CXXFLAGS", "") -if cxxflags != "": - gt_config.build_settings["extra_compile_args"]["cxx"] += cxxflags.split(" ") - -lflags = os.environ.get("LFLAGS", "") -if lflags != "": - gt_config.build_settings["extra_link_args"] += lflags.split(" ") diff --git a/src/cloudsc_python/src/cloudsc4py/framework/__init__.py b/src/cloudsc_python/src/cloudsc4py/framework/__init__.py deleted file mode 100644 index 95e3c8ad..00000000 --- a/src/cloudsc_python/src/cloudsc4py/framework/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. diff --git a/src/cloudsc_python/src/cloudsc4py/framework/components.py b/src/cloudsc_python/src/cloudsc4py/framework/components.py deleted file mode 100644 index a1e9c1bd..00000000 --- a/src/cloudsc_python/src/cloudsc4py/framework/components.py +++ /dev/null @@ -1,169 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from abc import abstractmethod -from functools import cached_property -from typing import Optional, TYPE_CHECKING - -from sympl._core.core_components import ( - DiagnosticComponent as SymplDiagnosticComponent, - ImplicitTendencyComponent as SymplImplicitTendencyComponent, -) - -from cloudsc4py.framework.config import GT4PyConfig -from cloudsc4py.framework.stencil import compile_stencil -from cloudsc4py.framework.storage import get_data_shape_from_name, get_dtype_from_name, zeros - -if TYPE_CHECKING: - from typing import Any, Dict - - from gt4py.cartesian import StencilObject - from sympl._core.typingx import PropertyDict - - from cloudsc4py.framework.grid import ComputationalGrid - from cloudsc4py.utils.typingx import Storage - - -class ComputationalGridComponent: - """Model component defined over a computational grid.""" - - def __init__(self, computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig) -> None: - self.computational_grid = computational_grid - self.gt4py_config = gt4py_config - - def compile_stencil( - self, name: str, externals: Optional[Dict[str, Any]] = None - ) -> StencilObject: - return compile_stencil(name, self.gt4py_config, externals) - - def fill_properties_with_dims(self, properties: PropertyDict) -> PropertyDict: - for field_name, field_prop in properties.items(): - field_prop["dims"] = self.computational_grid.grids[field_prop["grid"]].dims - return properties - - def allocate(self, name: str, properties: PropertyDict) -> Storage: - data_shape = get_data_shape_from_name(name) - dtype = get_dtype_from_name(name) - return zeros( - self.computational_grid, - properties[name]["grid"], - data_shape, - gt4py_config=self.gt4py_config, - dtype=dtype, - ) - - -class DiagnosticComponent(ComputationalGridComponent, SymplDiagnosticComponent): - """Grid-aware variant of Sympl's ``DiagnosticComponent``.""" - - def __init__( - self, - computational_grid: ComputationalGrid, - *, - enable_checks: bool = True, - gt4py_config: GT4PyConfig, - ) -> None: - super().__init__(computational_grid, gt4py_config=gt4py_config) - super(ComputationalGridComponent, self).__init__(enable_checks=enable_checks) - - @cached_property - def input_properties(self) -> PropertyDict: - return self.fill_properties_with_dims(self._input_properties) - - @abstractmethod - @cached_property - def _input_properties(self) -> PropertyDict: - """ - Dictionary where each key is the name of an input field, and the corresponding value is a - dictionary specifying the units for that field ('units') and the identifier of the grid over - which it is defined ('grid'). - """ - ... - - def allocate_diagnostic(self, name: str) -> Storage: - return self.allocate(name, self.diagnostic_properties) - - @cached_property - def diagnostic_properties(self) -> PropertyDict: - return self.fill_properties_with_dims(self._diagnostic_properties) - - @abstractmethod - @cached_property - def _diagnostic_properties(self) -> PropertyDict: - """ - Dictionary where each key is the name of a field diagnosed by the component, and the - corresponding value is a dictionary specifying the units for that field ('units') and the - identifier of the grid over which it is defined ('grid'). - """ - ... - - -class ImplicitTendencyComponent(ComputationalGridComponent, SymplImplicitTendencyComponent): - """Grid-aware variant of Sympl's ``ImplicitTendencyComponent``.""" - - def __init__( - self, - computational_grid: ComputationalGrid, - *, - enable_checks: bool = True, - gt4py_config: GT4PyConfig, - ) -> None: - super().__init__(computational_grid, gt4py_config=gt4py_config) - super(ComputationalGridComponent, self).__init__(enable_checks=enable_checks) - - @cached_property - def input_properties(self) -> PropertyDict: - return self.fill_properties_with_dims(self._input_properties) - - @abstractmethod - @cached_property - def _input_properties(self) -> PropertyDict: - """ - Dictionary where each key is the name of an input field, and the corresponding value is a - dictionary specifying the units for that field ('units') and the identifier of the grid over - which it is defined ('grid'). - """ - ... - - def allocate_tendency(self, name: str) -> Storage: - return self.allocate(name, self.tendency_properties) - - @cached_property - def tendency_properties(self) -> PropertyDict: - return self.fill_properties_with_dims(self._tendency_properties) - - @abstractmethod - @cached_property - def _tendency_properties(self) -> PropertyDict: - """ - Dictionary where each key is the name of a tendency field computed by the component, and the - corresponding value is a dictionary specifying the units for that field ('units') and the - identifier of the grid over which it is defined ('grid'). - """ - ... - - def allocate_diagnostic(self, name: str) -> Storage: - return self.allocate(name, self.diagnostic_properties) - - @cached_property - def diagnostic_properties(self) -> PropertyDict: - return self.fill_properties_with_dims(self._diagnostic_properties) - - @abstractmethod - @cached_property - def _diagnostic_properties(self) -> PropertyDict: - """ - Dictionary where each key is the name of a field diagnosed by the component, and the - corresponding value is a dictionary specifying the units for that field ('units') and the - identifier of the grid over which it is defined ('grid'). - """ - ... diff --git a/src/cloudsc_python/src/cloudsc4py/framework/config.py b/src/cloudsc_python/src/cloudsc4py/framework/config.py deleted file mode 100644 index b4607e84..00000000 --- a/src/cloudsc_python/src/cloudsc4py/framework/config.py +++ /dev/null @@ -1,71 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -import numpy as np -from pydantic import BaseModel, validator -from typing import Any, Dict, Optional, Union, Type - - -class DataTypes(BaseModel): - """Specify the datatypes for bool, float and integer fields.""" - - bool: Type - float: Type - int: Type - - def with_precision(self, precision: Literal["double", "single"]) -> DataTypes: - if precision == "double": - return DataTypes(bool=bool, float=np.float64, int=np.int64) - elif precision == "single": - return DataTypes(bool=bool, float=np.float32, int=np.int32) - else: - raise ValueError("Either `double` or `single` precision supported.") - - -class GT4PyConfig(BaseModel): - """Gather options controlling the compilation and execution of the code generated by GT4Py.""" - - backend: str - backend_opts: Dict[str, Any] = {} - build_info: Optional[Dict[str, Any]] = None - device_sync: bool = True - dtypes: DataTypes = DataTypes(bool=bool, float=float, int=int) - exec_info: Optional[Dict[str, Any]] = None - managed: Union[bool, str] = "gt4py" - rebuild: bool = False - validate_args: bool = False - verbose: bool = True - - @validator("exec_info") - @classmethod - def set_exec_info(cls, v: Optional[Dict[str, Any]]) -> Dict[str, Any]: - v = v or {} - return {**v, "__aggregate_data": True} - - def reset_exec_info(self): - self.exec_info = {"__aggregate_data": self.exec_info.get("__aggregate_data", True)} - - def with_backend(self, backend: Optional[str]) -> GT4PyConfig: - args = self.dict() - if backend is not None: - args["backend"] = backend - return GT4PyConfig(**args) - - def with_dtypes(self, dtypes: DataTypes) -> GT4PyConfig: - args = self.dict() - args["dtypes"] = dtypes - return GT4PyConfig(**args) - - def with_validate_args(self, flag: bool) -> GT4PyConfig: - args = self.dict() - args["validate_args"] = flag - return GT4PyConfig(**args) diff --git a/src/cloudsc_python/src/cloudsc4py/framework/grid.py b/src/cloudsc_python/src/cloudsc4py/framework/grid.py deleted file mode 100644 index 1503eb18..00000000 --- a/src/cloudsc_python/src/cloudsc4py/framework/grid.py +++ /dev/null @@ -1,86 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from functools import cached_property -import numpy as np -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from typing import Dict, Tuple - - -class DimSymbol: - """Symbol identifying a dimension, e.g. I or I-1/2.""" - - _instances: Dict[int, DimSymbol] = {} - - name: str - offset: float - - def __new__(cls, *args) -> DimSymbol: - key = hash(args) - if key not in cls._instances: - cls._instances[key] = super().__new__(cls) - return cls._instances[key] - - def __init__(self, name: str, offset: float) -> None: - self.name = name - self.offset = offset - - def __add__(self, other: float) -> DimSymbol: - return DimSymbol(self.name, self.offset + other) - - def __sub__(self, other: float) -> DimSymbol: - return self + (-other) - - def __repr__(self) -> str: - if self.offset > 0: - return f"{self.name} + {self.offset}" - elif self.offset < 0: - return f"{self.name} - {-self.offset}" - else: - return f"{self.name}" - - -I = DimSymbol("I", 0) -J = DimSymbol("J", 0) -K = DimSymbol("K", 0) - - -class Grid: - """Grid of points.""" - - def __init__( - self, shape: Tuple[int, ...], dims: Tuple[str, ...], storage_shape: Tuple[int, ...] = None - ) -> None: - assert len(shape) == len(dims) - self.shape = shape - self.dims = dims - self.storage_shape = storage_shape or self.shape - - @cached_property - def coords(self) -> Tuple[np.ndarray, ...]: - return tuple(np.arange(size) for size in self.storage_shape) - - -class ComputationalGrid: - """A three-dimensional computational grid consisting of mass and staggered grid points.""" - - grids: Dict[Tuple[DimSymbol, ...], Grid] - - def __init__(self, nx: int, ny: int, nz: int) -> None: - self.grids = { - (I, J, K): Grid((nx, ny, nz), ("x", "y", "z"), (nx, ny, nz + 1)), - (I, J, K - 1 / 2): Grid((nx, ny, nz + 1), ("x", "y", "z_h")), - (I, J): Grid((nx, ny), ("x", "y")), - (K,): Grid((nz,), ("z",), (nz + 1,)), - } diff --git a/src/cloudsc_python/src/cloudsc4py/framework/stencil.py b/src/cloudsc_python/src/cloudsc4py/framework/stencil.py deleted file mode 100644 index 4517262e..00000000 --- a/src/cloudsc_python/src/cloudsc4py/framework/stencil.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from typing import TYPE_CHECKING - -from gt4py.cartesian import gtscript - -if TYPE_CHECKING: - from typing import Any, Dict - - from gt4py.cartesian import StencilObject - - from cloudsc4py.framework.config import GT4PyConfig - - -FUNCTION_COLLECTION = {} -STENCIL_COLLECTION = {} - - -def function_collection(name: str): - """Decorator for GT4Py functions.""" - if name in FUNCTION_COLLECTION: - raise RuntimeError(f"Another function called `{name}` found.") - - def core(definition): - FUNCTION_COLLECTION[name] = {"definition": definition} - return definition - - return core - - -def stencil_collection(name: str): - """Decorator for GT4Py stencil definitions.""" - if name in STENCIL_COLLECTION: - raise RuntimeError(f"Another stencil called `{name}` found.") - - def core(definition): - STENCIL_COLLECTION[name] = {"definition": definition} - return definition - - return core - - -def compile_stencil( - name: str, - gt4py_config: GT4PyConfig, - externals: Dict[str, Any] = None, -) -> StencilObject: - """Automate and customize the compilation of GT4Py stencils.""" - stencil_info = STENCIL_COLLECTION.get(name, None) - if stencil_info is None: - raise RuntimeError(f"Unknown stencil `{name}`.") - definition = stencil_info["definition"] - - dtypes = gt4py_config.dtypes.dict() - dtypes[float] = gt4py_config.dtypes.float - dtypes[int] = gt4py_config.dtypes.int - externals = externals or {} - - kwargs = gt4py_config.backend_opts.copy() - if gt4py_config.backend not in ("debug", "numpy", "gtc:numpy"): - kwargs["verbose"] = gt4py_config.verbose - - return gtscript.stencil( - gt4py_config.backend, - definition, - name=name, - build_info=gt4py_config.build_info, - dtypes=dtypes, - externals=externals, - rebuild=gt4py_config.rebuild, - **kwargs, - ) diff --git a/src/cloudsc_python/src/cloudsc4py/framework/storage.py b/src/cloudsc_python/src/cloudsc4py/framework/storage.py deleted file mode 100644 index 2887b5af..00000000 --- a/src/cloudsc_python/src/cloudsc4py/framework/storage.py +++ /dev/null @@ -1,173 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from contextlib import contextmanager -import numpy as np -from typing import TYPE_CHECKING - -import gt4py -from sympl._core.data_array import DataArray - -if TYPE_CHECKING: - from typing import Dict, List, Literal, Optional, Tuple - - from cloudsc4py.framework.config import GT4PyConfig - from cloudsc4py.framework.grid import ComputationalGrid, DimSymbol - from cloudsc4py.utils.typingx import Storage - - -def zeros( - computational_grid: ComputationalGrid, - grid_id: Tuple[DimSymbol, ...], - data_shape: Optional[Tuple[int, ...]] = None, - *, - gt4py_config: GT4PyConfig, - dtype: Literal["bool", "float", "int"], -) -> Storage: - """ - Create an array defined over the grid ``grid_id`` of ``computational_grid`` - and fill it with zeros. - - Relying on GT4Py utilities to optimally allocate memory based on the chosen backend. - """ - grid = computational_grid.grids[grid_id] - data_shape = data_shape or () - shape = grid.storage_shape + data_shape - dtype = gt4py_config.dtypes.dict()[dtype] - return gt4py.storage.zeros(shape, dtype, backend=gt4py_config.backend) - - -def get_data_array( - buffer: Storage, - computational_grid: ComputationalGrid, - grid_id: Tuple[DimSymbol, ...], - units: str, - data_dims: Optional[Tuple[str, ...]] = None, -) -> DataArray: - """Create a ``DataArray`` out of ``buffer``.""" - grid = computational_grid.grids[grid_id] - data_dims = data_dims or () - dims = grid.dims + data_dims - coords = grid.coords + tuple( - np.arange(data_size) for data_size in buffer.shape[len(grid.dims) :] - ) - return DataArray(buffer, dims=dims, coords=coords, attrs={"units": units}) - - -def allocate_data_array( - computational_grid: ComputationalGrid, - grid_id: Tuple[DimSymbol, ...], - units: str, - data_shape: Optional[Tuple[int, ...]] = None, - data_dims: Optional[Tuple[str, ...]] = None, - *, - gt4py_config: GT4PyConfig, - dtype: Literal["bool", "float", "int"], -) -> DataArray: - """ - Create a ``DataArray`` defined over the grid ``grid_id`` of ``computational_grid`` - and fill it with zeros. - """ - buffer = zeros( - computational_grid, grid_id, data_shape=data_shape, gt4py_config=gt4py_config, dtype=dtype - ) - return get_data_array(buffer, computational_grid, grid_id, units, data_dims=data_dims) - - -def get_dtype_from_name(field_name: str) -> str: - """ - Retrieve the datatype of a field from its name. - - Assume that the name of a bool field is of the form 'b_{some_name}', - the name of a float field is of the form 'f_{some_name}', - and the name of an integer field is of the form 'i_{some_name}'. - """ - if field_name.startswith("b"): - return "bool" - elif field_name.startswith("f"): - return "float" - elif field_name.startswith("i"): - return "int" - else: - raise RuntimeError(f"Cannot retrieve dtype for field `{field_name}`.") - - -def get_data_shape_from_name(field_name: str) -> Tuple[int, ...]: - """ - Retrieve the data dimension of a field from its name. - - Assume that the name of an n-dimensional field, with n > 1, is '{some_name}_n'. - """ - data_dims = field_name.split("_", maxsplit=1)[0][1:] - out = tuple(int(c) for c in data_dims) - return out - - -TEMPORARY_STORAGE_POOL: Dict[int, List[Storage]] = {} - - -@contextmanager -def managed_temporary_storage( - computational_grid: ComputationalGrid, - *args: Tuple[Tuple[DimSymbol, ...], Literal["bool", "float", "int"]], - gt4py_config: GT4PyConfig, -): - """ - Get temporary storages defined over the grids of ``computational_grid``. - - Each ``arg`` is a tuple where the first element specifies the grid identifier, and the second - element specifies the datatype. - - The storages are either created on-the-fly, or retrieved from ``TEMPORARY_STORAGE_POOL`` - if available. On exit, all storages are included in ``TEMPORARY_STORAGE_POOL`` for later use. - """ - grid_hashes = [] - storages = [] - for grid_id, dtype in args: - grid = computational_grid.grids[grid_id] - grid_hash = hash((grid.shape + grid_id, dtype)) - pool = TEMPORARY_STORAGE_POOL.setdefault(grid_hash, []) - if len(pool) > 0: - storage = pool.pop() - else: - storage = zeros(computational_grid, grid_id, gt4py_config=gt4py_config, dtype=dtype) - grid_hashes.append(grid_hash) - storages.append(storage) - - try: - if len(storages) == 1: - yield storages[0] - else: - yield storages - finally: - for grid_hash, storage in zip(grid_hashes, storages): - TEMPORARY_STORAGE_POOL[grid_hash].append(storage) - - -@contextmanager -def managed_temporary_storage_pool(): - """ - Clear the pool of temporary storages ``TEMPORARY_STORAGE_POOL`` on entry and exit. - - Useful when running multiple simulations using different backends within the same session. - All simulations using the same backend should be wrapped by this context manager. - """ - try: - TEMPORARY_STORAGE_POOL.clear() - yield None - finally: - for grid_hash, storages in TEMPORARY_STORAGE_POOL.items(): - num_storages = len(storages) - for _ in range(num_storages): - storage = storages.pop() - del storage - TEMPORARY_STORAGE_POOL.clear() diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/__init__.py b/src/cloudsc_python/src/cloudsc4py/initialization/__init__.py deleted file mode 100644 index 95e3c8ad..00000000 --- a/src/cloudsc_python/src/cloudsc4py/initialization/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/reference.py b/src/cloudsc_python/src/cloudsc4py/initialization/reference.py deleted file mode 100644 index e2c5804a..00000000 --- a/src/cloudsc_python/src/cloudsc4py/initialization/reference.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from datetime import datetime -from functools import partial -from typing import TYPE_CHECKING - -from cloudsc4py.framework.grid import I, J, K -from cloudsc4py.framework.storage import allocate_data_array -from cloudsc4py.initialization.utils import initialize_field - -if TYPE_CHECKING: - from typing import Literal, Tuple - - from sympl._core.data_array import DataArray - from sympl._core.typingx import DataArrayDict - - from cloudsc4py.framework.config import GT4PyConfig - from cloudsc4py.framework.grid import ComputationalGrid, DimSymbol - from cloudsc4py.utils.iox import HDF5Reader - - -def allocate_tendencies( - computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig -) -> DataArrayDict: - def allocate(units: str = "") -> DataArray: - return allocate_data_array( - computational_grid, (I, J, K), units, gt4py_config=gt4py_config, dtype="float" - ) - - return { - "time": datetime(year=2022, month=1, day=1), - "f_a": allocate(), - "f_qi": allocate(), - "f_ql": allocate(), - "f_qr": allocate(), - "f_qs": allocate(), - "f_qv": allocate(), - "f_t": allocate(), - } - - -def initialize_tendencies(tendencies: DataArrayDict, hdf5_reader: HDF5Reader) -> None: - hdf5_reader_keys = {"f_a": "TENDENCY_LOC_A", "f_qv": "TENDENCY_LOC_Q", "f_t": "TENDENCY_LOC_T"} - for name, hdf5_reader_key in hdf5_reader_keys.items(): - buffer = hdf5_reader.get_field(hdf5_reader_key) - initialize_field(tendencies[name], buffer) - - cld = hdf5_reader.get_field("TENDENCY_LOC_CLD") - for idx, name in enumerate(("f_ql", "f_qi", "f_qr", "f_qs")): - initialize_field(tendencies[name], cld[..., idx]) - - -def allocate_diagnostics( - computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig -) -> DataArrayDict: - def _allocate( - grid_id: Tuple[DimSymbol, ...], units: str, dtype: Literal["bool", "float", "int"] - ) -> DataArray: - return allocate_data_array( - computational_grid, grid_id, units, gt4py_config=gt4py_config, dtype=dtype - ) - - allocate = partial(_allocate, grid_id=(I, J, K), units="", dtype="float") - allocate_h = partial(_allocate, grid_id=(I, J, K - 1 / 2), units="", dtype="float") - allocate_ij = partial(_allocate, grid_id=(I, J), units="", dtype="float") - - return { - "time": datetime(year=2022, month=1, day=1), - "f_covptot": allocate(), - "f_fcqlng": allocate_h(), - "f_fcqnng": allocate_h(), - "f_fcqrng": allocate_h(), - "f_fcqsng": allocate_h(), - "f_fhpsl": allocate_h(), - "f_fhpsn": allocate_h(), - "f_fplsl": allocate_h(), - "f_fplsn": allocate_h(), - "f_fsqif": allocate_h(), - "f_fsqitur": allocate_h(), - "f_fsqlf": allocate_h(), - "f_fsqltur": allocate_h(), - "f_fsqrf": allocate_h(), - "f_fsqsf": allocate_h(), - "f_rainfrac_toprfz": allocate_ij(), - } - - -def initialize_diagnostics(diagnostics: DataArrayDict, hdf5_reader: HDF5Reader) -> None: - hdf5_reader_keys = {name: "P" + name[2:].upper() for name in diagnostics if name != "time"} - for name, hdf5_reader_key in hdf5_reader_keys.items(): - buffer = hdf5_reader.get_field(hdf5_reader_key) - initialize_field(diagnostics[name], buffer) - - -def get_reference_tendencies( - computational_grid: ComputationalGrid, hdf5_reader: HDF5Reader, *, gt4py_config: GT4PyConfig -) -> DataArrayDict: - tendencies = allocate_tendencies(computational_grid, gt4py_config=gt4py_config) - initialize_tendencies(tendencies, hdf5_reader) - return tendencies - - -def get_reference_diagnostics( - computational_grid: ComputationalGrid, hdf5_reader: HDF5Reader, *, gt4py_config: GT4PyConfig -) -> DataArrayDict: - diagnostics = allocate_diagnostics(computational_grid, gt4py_config=gt4py_config) - initialize_diagnostics(diagnostics, hdf5_reader) - return diagnostics diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/state.py b/src/cloudsc_python/src/cloudsc4py/initialization/state.py deleted file mode 100644 index 0e743e11..00000000 --- a/src/cloudsc_python/src/cloudsc4py/initialization/state.py +++ /dev/null @@ -1,142 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from datetime import datetime -from functools import partial -from typing import TYPE_CHECKING - -from cloudsc4py.framework.grid import I, J, K -from cloudsc4py.framework.storage import allocate_data_array -from cloudsc4py.initialization.utils import initialize_field - -if TYPE_CHECKING: - from typing import Literal, Tuple - - from sympl._core.data_array import DataArray - from sympl._core.typingx import DataArrayDict - - from cloudsc4py.framework.config import GT4PyConfig - from cloudsc4py.framework.grid import ComputationalGrid, DimSymbol - from cloudsc4py.utils.iox import HDF5Reader - - -def allocate_state( - computational_grid: ComputationalGrid, *, gt4py_config: GT4PyConfig -) -> DataArrayDict: - def _allocate( - grid_id: Tuple[DimSymbol, ...], units: str, dtype: Literal["bool", "float", "int"] - ) -> DataArray: - return allocate_data_array( - computational_grid, grid_id, units, gt4py_config=gt4py_config, dtype=dtype - ) - - allocate_b_ij = partial(_allocate, grid_id=(I, J), units="", dtype="bool") - allocate_f = partial(_allocate, grid_id=(I, J, K), units="", dtype="float") - allocate_f_h = partial(_allocate, grid_id=(I, J, K - 1 / 2), units="", dtype="float") - allocate_f_ij = partial(_allocate, grid_id=(I, J), units="", dtype="float") - allocate_i_ij = partial(_allocate, grid_id=(I, J), units="", dtype="int") - - return { - "time": datetime(year=2022, month=1, day=1), - "b_convection_on": allocate_b_ij(), - "f_a": allocate_f(), - "f_ap": allocate_f(), - "f_aph": allocate_f_h(), - "f_ccn": allocate_f(), - "f_dyni": allocate_f(), - "f_dynl": allocate_f(), - "f_hrlw": allocate_f(), - "f_hrsw": allocate_f(), - "f_icrit_aer": allocate_f(), - "f_lcrit_aer": allocate_f(), - "f_lsm": allocate_f_ij(), - "f_lu": allocate_f(), - "f_lude": allocate_f(), - "f_mfd": allocate_f(), - "f_mfu": allocate_f(), - "f_nice": allocate_f(), - "f_qi": allocate_f(), - "f_ql": allocate_f(), - "f_qr": allocate_f(), - "f_qs": allocate_f(), - "f_qv": allocate_f(), - "f_re_ice": allocate_f(), - "f_snde": allocate_f(), - "f_supsat": allocate_f(), - "f_t": allocate_f(), - "f_tnd_tmp_a": allocate_f(), - "f_tnd_tmp_qi": allocate_f(), - "f_tnd_tmp_ql": allocate_f(), - "f_tnd_tmp_qr": allocate_f(), - "f_tnd_tmp_qs": allocate_f(), - "f_tnd_tmp_qv": allocate_f(), - "f_tnd_tmp_t": allocate_f(), - "f_vfa": allocate_f(), - "f_vfi": allocate_f(), - "f_vfl": allocate_f(), - "f_w": allocate_f(), - "i_convection_type": allocate_i_ij(), - } - - -def initialize_state(state: DataArrayDict, hdf5_reader: HDF5Reader) -> None: - hdf5_reader_keys = { - "b_convection_on": "LDCUM", - "f_a": "PA", - "f_ap": "PAP", - "f_aph": "PAPH", - "f_ccn": "PCCN", - "f_dyni": "PDYNI", - "f_dynl": "PDYNL", - "f_hrlw": "PHRLW", - "f_hrsw": "PHRSW", - "f_icrit_aer": "PICRIT_AER", - "f_lcrit_aer": "PLCRIT_AER", - "f_lsm": "PLSM", - "f_lu": "PLU", - "f_lude": "PLUDE", - "f_mfd": "PMFD", - "f_mfu": "PMFU", - "f_nice": "PNICE", - "f_qv": "PQ", - "f_re_ice": "PRE_ICE", - "f_snde": "PSNDE", - "f_supsat": "PSUPSAT", - "f_t": "PT", - "f_tnd_tmp_a": "TENDENCY_TMP_A", - "f_tnd_tmp_qv": "TENDENCY_TMP_Q", - "f_tnd_tmp_t": "TENDENCY_TMP_T", - "f_vfa": "PVFA", - "f_vfi": "PVFI", - "f_vfl": "PVFL", - "f_w": "PVERVEL", - "i_convection_type": "KTYPE", - } - for name, hdf5_reader_key in hdf5_reader_keys.items(): - buffer = hdf5_reader.get_field(hdf5_reader_key) - initialize_field(state[name], buffer) - - clv = hdf5_reader.get_field("PCLV") - for idx, name in enumerate(("f_ql", "f_qi", "f_qr", "f_qs")): - initialize_field(state[name], clv[..., idx]) - - tnd_tmp_cld = hdf5_reader.get_field("TENDENCY_TMP_CLD") - for idx, name in enumerate(("f_tnd_tmp_ql", "f_tnd_tmp_qi", "f_tnd_tmp_qr", "f_tnd_tmp_qs")): - initialize_field(state[name], tnd_tmp_cld[..., idx]) - - -def get_state( - computational_grid: ComputationalGrid, hdf5_reader: HDF5Reader, *, gt4py_config: GT4PyConfig -) -> DataArrayDict: - state = allocate_state(computational_grid, gt4py_config=gt4py_config) - initialize_state(state, hdf5_reader) - return state diff --git a/src/cloudsc_python/src/cloudsc4py/initialization/utils.py b/src/cloudsc_python/src/cloudsc4py/initialization/utils.py deleted file mode 100644 index f0fef9c0..00000000 --- a/src/cloudsc_python/src/cloudsc4py/initialization/utils.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -import numpy as np -from typing import TYPE_CHECKING - -from cloudsc4py.utils.numpyx import assign - -if TYPE_CHECKING: - from sympl._core.data_array import DataArray - - from cloudsc4py.utils.typingx import Storage - - -def initialize_storage_2d(storage: Storage, buffer: np.ndarray) -> None: - ni = storage.shape[0] - mi = buffer.size - nb = ni // mi - for b in range(nb): - assign(storage[b * mi : (b + 1) * mi, 0:1], buffer[:, np.newaxis]) - assign(storage[nb * mi :, 0:1], buffer[: ni - nb * mi, np.newaxis]) - - -def initialize_storage_3d(storage: Storage, buffer: np.ndarray) -> None: - ni, _, nk = storage.shape - mi, mk = buffer.shape - lk = min(nk, mk) - nb = ni // mi - for b in range(nb): - assign(storage[b * mi : (b + 1) * mi, 0:1, :lk], buffer[:, np.newaxis, :lk]) - assign(storage[nb * mi :, 0:1, :lk], buffer[: ni - nb * mi, np.newaxis, :lk]) - - -def initialize_field(field: DataArray, buffer: np.ndarray) -> None: - if field.ndim == 2: - initialize_storage_2d(field.data, buffer) - elif field.ndim == 3: - initialize_storage_3d(field.data, buffer) - else: - raise ValueError("The field to initialize must be either 2-d or 3-d.") diff --git a/src/cloudsc_python/src/cloudsc4py/physics/__init__.py b/src/cloudsc_python/src/cloudsc4py/physics/__init__.py deleted file mode 100644 index 7a356af6..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import cloudsc4py.physics._stencils diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/__init__.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/__init__.py deleted file mode 100644 index 03f5582b..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import cloudsc4py.physics._stencils.cloudsc -import cloudsc4py.physics._stencils.cloudsc_split -import cloudsc4py.physics._stencils.cuadjtq -import cloudsc4py.physics._stencils.fccld -import cloudsc4py.physics._stencils.fcttre -import cloudsc4py.physics._stencils.helpers diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py deleted file mode 100644 index 23f2c9c8..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc.py +++ /dev/null @@ -1,2186 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations - -from gt4py.cartesian.gtscript import Field, IJ, K - -from cloudsc4py.framework.stencil import stencil_collection -from cloudsc4py.physics._stencils.cuadjtq import f_cuadjtq -from cloudsc4py.physics._stencils.fccld import f_fokoop -from cloudsc4py.physics._stencils.fcttre import ( - f_foealfa, - f_foedelta, - f_foedem, - f_foeeice, - f_foeeliq, - f_foeewm, - f_foeldcpm, -) -from cloudsc4py.physics._stencils.helpers import f_helper_0, f_helper_1 - - -@stencil_collection("cloudsc") -def cloudsc( - in_a: Field["float"], - in_ap: Field["float"], - in_aph: Field["float"], # staggered - in_ccn: Field["float"], - in_convection_on: Field[IJ, "bool"], - in_convection_type: Field[IJ, "int"], - in_hrlw: Field["float"], - in_hrsw: Field["float"], - in_icrit_aer: Field["float"], - in_lcrit_aer: Field["float"], - in_lsm: Field[IJ, "float"], - in_lu: Field["float"], - in_lude: Field["float"], - in_mfd: Field["float"], - in_mfu: Field["float"], - in_nice: Field["float"], - in_qi: Field["float"], - in_ql: Field["float"], - in_qr: Field["float"], - in_qs: Field["float"], - in_qv: Field["float"], - in_re_ice: Field["float"], - in_snde: Field["float"], - in_supsat: Field["float"], - in_t: Field["float"], - in_tnd_tmp_a: Field["float"], - in_tnd_tmp_qi: Field["float"], - in_tnd_tmp_ql: Field["float"], - in_tnd_tmp_qr: Field["float"], - in_tnd_tmp_qs: Field["float"], - in_tnd_tmp_qv: Field["float"], - in_tnd_tmp_t: Field["float"], - in_vfi: Field["float"], - in_vfl: Field["float"], - in_w: Field["float"], - out_covptot: Field["float"], - out_fcqlng: Field["float"], # staggered - out_fcqnng: Field["float"], # staggered - out_fcqrng: Field["float"], # staggered - out_fcqsng: Field["float"], # staggered - out_fhpsl: Field["float"], # staggered - out_fhpsn: Field["float"], # staggered - out_fplsl: Field["float"], # staggered - out_fplsn: Field["float"], # staggered - out_fsqif: Field["float"], # staggered - out_fsqitur: Field["float"], # staggered - out_fsqlf: Field["float"], # staggered - out_fsqltur: Field["float"], # staggered - out_fsqrf: Field["float"], # staggered - out_fsqsf: Field["float"], # staggered - out_rainfrac_toprfz: Field[IJ, "float"], - out_tnd_loc_a: Field["float"], - out_tnd_loc_qi: Field["float"], - out_tnd_loc_ql: Field["float"], - out_tnd_loc_qr: Field["float"], - out_tnd_loc_qs: Field["float"], - out_tnd_loc_qv: Field["float"], - out_tnd_loc_t: Field["float"], - tmp_aph_s: Field[IJ, "float"], - tmp_cldtopdist: Field[IJ, "float"], - tmp_covpmax: Field[IJ, "float"], - tmp_covptot: Field[IJ, "float"], - tmp_klevel: Field[K, "int"], - tmp_paphd: Field[IJ, "float"], - tmp_rainliq: Field[IJ, "bool"], - tmp_trpaus: Field[IJ, "float"], - *, - dt: "float", -): - from __externals__ import ( - DEPICE, - EPSEC, - EPSILON, - EVAPRAIN, - EVAPSNOW, - FALLQI, - FALLQL, - FALLQR, - FALLQS, - FALLQV, - LAERICEAUTO, - LAERICESED, - LAERLIQAUTOLSP, - LAERLIQCOLL, - NCLDTOP, - NLEV, - NSSOPT, - PHASEQI, - PHASEQL, - PHASEQR, - PHASEQS, - PHASEQV, - R4IES, - R4LES, - R5IES, - R5LES, - RALFDCP, - RALSDCP, - RALVDCP, - RAMID, - RAMIN, - RCCN, - RCL_APB1, - RCL_APB2, - RCL_APB3, - RCL_CDENOM1, - RCL_CDENOM2, - RCL_CDENOM3, - RCL_CONST1I, - RCL_CONST1R, - RCL_CONST1S, - RCL_CONST2I, - RCL_CONST2R, - RCL_CONST2S, - RCL_CONST3I, - RCL_CONST3R, - RCL_CONST3S, - RCL_CONST4I, - RCL_CONST4R, - RCL_CONST4S, - RCL_CONST5I, - RCL_CONST5R, - RCL_CONST5S, - RCL_CONST6I, - RCL_CONST6R, - RCL_CONST6S, - RCL_CONST7S, - RCL_CONST8S, - RCL_FAC1, - RCL_FAC2, - RCL_FZRAB, - RCL_KK_cloud_num_land, - RCL_KK_cloud_num_sea, - RCL_KKAac, - RCL_KKAau, - RCL_KKBac, - RCL_KKBaun, - RCL_KKBauq, - RCLCRIT_LAND, - RCLCRIT_SEA, - RCLDIFF, - RCLDIFF_CONVI, - RCLDTOPCF, - RCOVPMIN, - RD, - RDCP, - RDENSREF, - RDEPLIQREFDEPTH, - RDEPLIQREFRATE, - RETV, - RG, - RICEINIT, - RKCONV, - RKOOPTAU, - RLCRITSNOW, - RLDCP, - RLMIN, - RLSTT, - RLVTT, - RNICE, - RPECONS, - RPRC1, - RPRECRHMAX, - RSNOWLIN1, - RSNOWLIN2, - RTAUMEL, - RTHOMO, - RTT, - RV, - RVRFACTOR, - TW1, - TW2, - TW3, - TW4, - TW5, - VQI, - VQL, - VQR, - VQS, - VQV, - WARMRAIN, - ) - - with computation(FORWARD), interval(0, 1): - # zero arrays - out_rainfrac_toprfz[0, 0] = 0.0 - tmp_cldtopdist[0, 0] = 0.0 - tmp_covpmax[0, 0] = 0.0 - tmp_covptot[0, 0] = 0.0 - tmp_paphd[0, 0] = 0.0 - tmp_rainliq[0, 0] = True - tmp_trpaus[0, 0] = 0.0 - - with computation(FORWARD), interval(0, -1): - # === 1: initial values for variables - # --- initialization of output tendencies - out_tnd_loc_t[0, 0, 0] = 0 - out_tnd_loc_a[0, 0, 0] = 0 - out_tnd_loc_ql[0, 0, 0] = 0 - out_tnd_loc_qr[0, 0, 0] = 0 - out_tnd_loc_qi[0, 0, 0] = 0 - out_tnd_loc_qs[0, 0, 0] = 0 - out_tnd_loc_qv[0, 0, 0] = 0 - - # --- non CLV initialization - t = in_t[0, 0, 0] + dt * in_tnd_tmp_t[0, 0, 0] - a = in_a[0, 0, 0] + dt * in_tnd_tmp_a[0, 0, 0] - a0 = a - - # --- initialization for CLV family - ql = in_ql[0, 0, 0] + dt * in_tnd_tmp_ql[0, 0, 0] - ql0 = ql - qi = in_qi[0, 0, 0] + dt * in_tnd_tmp_qi[0, 0, 0] - qi0 = qi - qr = in_qr[0, 0, 0] + dt * in_tnd_tmp_qr[0, 0, 0] - qr0 = qr - qs = in_qs[0, 0, 0] + dt * in_tnd_tmp_qs[0, 0, 0] - qs0 = qs - qv = in_qv[0, 0, 0] + dt * in_tnd_tmp_qv[0, 0, 0] - - # --- zero arrays - lneg_ql = 0.0 - lneg_qi = 0.0 - lneg_qr = 0.0 - lneg_qs = 0.0 - - # --- tidy up very small cloud cover or total cloud water - expr1 = ql + qi - if expr1 < RLMIN or a < RAMIN: - # evaporate small cloud liquid water amounts - lneg_ql += ql - qadj = ql / dt - out_tnd_loc_qv[0, 0, 0] += qadj - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - qv += ql - ql = 0.0 - - # evaporate small cloud ice water amounts - lneg_qi += qi - qadj = qi / dt - out_tnd_loc_qv[0, 0, 0] += qadj - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qi - qi = 0.0 - - # set cloud cover to zero - a = 0.0 - - # --- tidy up small CLV variables: ql - if ql < RLMIN: - lneg_ql += ql - qadj = ql / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQL == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQL == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += ql - ql = 0.0 - - # --- tidy up small CLV variables: qi - if qi < RLMIN: - lneg_qi += qi - qadj = qi / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQI == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQI == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qi - qi = 0.0 - - # --- tidy up small CLV variables: qr - if qr < RLMIN: - lneg_qr += qr - qadj = qr / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQR == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQR == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qr - qr = 0.0 - - # --- tidy up small CLV variables: qs - if qs < RLMIN: - lneg_qs += qs - qadj = qs / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQS == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQS == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qs - qs = 0.0 - - # --- define saturation values - # --- old *diagnostic* mixed phase saturation - foealfa = f_foealfa(t) - foeewmt = min(f_foeewm(t) / in_ap[0, 0, 0], 0.5) - qsmix = foeewmt / (1 - RETV * foeewmt) - - # --- ice saturation T < 273K - # --- liquid water saturation for T > 273K - alfa = f_foedelta(t) - foeew = min((alfa * f_foeeliq(t) + (1 - alfa) * f_foeeice(t)) / in_ap[0, 0, 0], 0.5) - qsice = foeew / (1 - RETV * foeew) - - # --- liquid water saturation - foeeliqt = min(f_foeeliq(t) / in_ap[0, 0, 0], 0.5) - qsliq = foeeliqt / (1 - RETV * foeeliqt) - - # --- ensure cloud fraction is between 0 and 1 - a = max(0, min(1, a)) - - # --- calculate liq/ice fractions (no longer a diagnostic relationship) - li = ql + qi - if li > RLMIN: - liqfrac = ql / li - icefrac = 1 - liqfrac - else: - liqfrac = 0.0 - icefrac = 0.0 - - # === 2: constants and parameters - # --- find tropopause level - with computation(FORWARD), interval(0, 1): - tmp_trpaus[0, 0] = 0.1 - tmp_paphd[0, 0] = 1 / tmp_aph_s[0, 0] - with computation(FORWARD), interval(0, -2): - sig = in_ap[0, 0, 0] * tmp_paphd[0, 0] - if sig > 0.1 and sig < 0.4 and t[0, 0, 0] > t[0, 0, 1]: - tmp_trpaus[0, 0] = sig - - # === 3: physics - # --- main vertical loop - with computation(FORWARD): - with interval(0, NCLDTOP - 1): - # --- initialize variables - lude = in_lude[0, 0, 0] - pfplsl = 0.0 - pfplsi = 0.0 - pfplsr = 0.0 - pfplss = 0.0 - pfplsv = 0.0 - qln = 0.0 - qin = 0.0 - qrn = 0.0 - qsn = 0.0 - qvn = 0.0 - anew = 0.0 - with interval(NCLDTOP - 1, -1): - # *** 3.0: initialize variables - # --- first guess microphysics - qlfg = ql - qifg = qi - qrfg = qr - qsfg = qs - qvfg = qv - - convsink_ql = 0.0 - convsink_qi = 0.0 - convsink_qr = 0.0 - convsink_qs = 0.0 - convsrce_ql = 0.0 - convsrce_qi = 0.0 - convsrce_qr = 0.0 - convsrce_qs = 0.0 - convsrce_qv = 0.0 - fallsrce_ql = 0.0 - fallsrce_qi = 0.0 - fallsrce_qr = 0.0 - fallsrce_qs = 0.0 - index1_ql = True - index1_qi = True - index1_qr = True - index1_qs = True - index1_qv = True - index3_ql_ql = False - index3_ql_qi = False - index3_ql_qr = False - index3_ql_qs = False - index3_ql_qv = False - index3_qi_ql = False - index3_qi_qi = False - index3_qi_qr = False - index3_qi_qs = False - index3_qi_qv = False - index3_qr_ql = False - index3_qr_qi = False - index3_qr_qr = False - index3_qr_qs = False - index3_qr_qv = False - index3_qs_ql = False - index3_qs_qi = False - index3_qs_qr = False - index3_qs_qs = False - index3_qs_qv = False - index3_qv_ql = False - index3_qv_qi = False - index3_qv_qr = False - index3_qv_qs = False - index3_qv_qv = False - lcust_ql = 0.0 - lcust_qi = 0.0 - lcust_qr = 0.0 - lcust_qs = 0.0 - lcust_qv = 0.0 - ldefr = 0.0 - lfinalsum = 0.0 - order_ql = -999 - order_qi = -999 - order_qr = -999 - order_qs = -999 - order_qv = -999 - psupsatsrce_ql = 0.0 - psupsatsrce_qi = 0.0 - psupsatsrce_qr = 0.0 - psupsatsrce_qs = 0.0 - qpretot = 0.0 - solab = 0.0 - solac = 0.0 - solqa_ql_ql = 0.0 - solqa_ql_qi = 0.0 - solqa_ql_qr = 0.0 - solqa_ql_qs = 0.0 - solqa_ql_qv = 0.0 - solqa_qi_ql = 0.0 - solqa_qi_qi = 0.0 - solqa_qi_qr = 0.0 - solqa_qi_qs = 0.0 - solqa_qi_qv = 0.0 - solqa_qr_ql = 0.0 - solqa_qr_qi = 0.0 - solqa_qr_qr = 0.0 - solqa_qr_qs = 0.0 - solqa_qr_qv = 0.0 - solqa_qs_ql = 0.0 - solqa_qs_qi = 0.0 - solqa_qs_qr = 0.0 - solqa_qs_qs = 0.0 - solqa_qs_qv = 0.0 - solqa_qv_ql = 0.0 - solqa_qv_qi = 0.0 - solqa_qv_qr = 0.0 - solqa_qv_qs = 0.0 - solqa_qv_qv = 0.0 - solqb_ql_ql = 0.0 - solqb_ql_qi = 0.0 - solqb_ql_qr = 0.0 - solqb_ql_qs = 0.0 - solqb_ql_qv = 0.0 - solqb_qi_ql = 0.0 - solqb_qi_qi = 0.0 - solqb_qi_qr = 0.0 - solqb_qi_qs = 0.0 - solqb_qi_qv = 0.0 - solqb_qr_ql = 0.0 - solqb_qr_qi = 0.0 - solqb_qr_qr = 0.0 - solqb_qr_qs = 0.0 - solqb_qr_qv = 0.0 - solqb_qs_ql = 0.0 - solqb_qs_qi = 0.0 - solqb_qs_qr = 0.0 - solqb_qs_qs = 0.0 - solqb_qs_qv = 0.0 - solqb_qv_ql = 0.0 - solqb_qv_qi = 0.0 - solqb_qv_qr = 0.0 - solqb_qv_qs = 0.0 - solqb_qv_qv = 0.0 - - # derived variables needed - dp = in_aph[0, 0, 1] - in_aph[0, 0, 0] - gdp = RG / dp - rho = in_ap[0, 0, 0] / (RD * t) - dtgdp = dt * gdp - rdtgdp = dp / (RG * dt) - - # --- calculate dqs/dT correction factor - # liquid - facw = R5LES / (t - R4LES) ** 2 - cor = 1 / (1 - RETV * foeeliqt) - dqsliqdt = facw * cor * qsliq - corqsliq = 1 + RALVDCP * dqsliqdt - - # ice - faci = R5IES / (t - R4IES) ** 2 - cor = 1 / (1 - RETV * foeew) - dqsicedt = faci * cor * qsice - corqsice = 1 + RALSDCP * dqsicedt - - # diagnostic mixed - fac = foealfa * facw + (1 - foealfa) * faci - cor = 1 / (1 - RETV * foeewmt) - dqsmixdt = fac * cor * qsmix - corqsmix = 1 + f_foeldcpm(t) * dqsmixdt - - # evaporation/sublimation limits - evaplimmix = max((qsmix - qv) / corqsmix, 0.0) - evaplimice = max((qsice - qv) / corqsice, 0.0) - - # --- in-cloud condensate amount - tmpa = 1 / max(a, EPSEC) - liqcld = ql * tmpa - icecld = qi * tmpa - licld = liqcld + icecld - - # --- evaporate very small amounts of liquid... - if ql < RLMIN: - solqa_qv_ql += ql - solqa_ql_qv -= ql - - # --- ...and ice - if qi < RLMIN: - solqa_qv_qi += qi - solqa_qi_qv -= qi - - # *** 3.1: ice supersaturation adjustment - # --- supersaturation limit (from Koop) - fokoop = f_fokoop(t) - - if t >= RTT or NSSOPT == 0: - fac = 1.0 - faci = 1.0 - else: - fac = a + fokoop * (1 - a) - faci = dt / RKOOPTAU - - # calculate supersaturation to add to cloud - if a > 1 - RAMIN: - supsat = max((qv - fac * qsice) / corqsice, 0.0) - else: - # calculate environmental humidity supersaturation - qp1env = (qv - a * qsice) / max(1 - a, EPSILON) - supsat = max((1 - a) * (qp1env - fac * qsice) / corqsice, 0.0) - - # --- here the supersaturation is turned into liquid water - if supsat > EPSEC: - if t > RTHOMO: - # turn supersaturation into liquid water - solqa_ql_qv += supsat - solqa_qv_ql -= supsat - # include liquid in first guess - qlfg += supsat - else: - # turn supersaturation into ice water - solqa_qi_qv += supsat - solqa_qv_qi -= supsat - # add ice to first guess for deposition term - qifg += supsat - - # increase cloud amount using RKOOPTAU timescale - solac = (1 - a) * faci - - # --- include supersaturation from previous timestep - if in_supsat[0, 0, 0] > EPSEC: - if t > RTHOMO: - # turn supersaturation into liquid water - solqa_ql_ql += in_supsat[0, 0, 0] - psupsatsrce_ql = in_supsat[0, 0, 0] - # add liquid to first guess for deposition term - qlfg += in_supsat[0, 0, 0] - else: - # turn supersaturation into ice water - solqa_qi_qi += in_supsat[0, 0, 0] - psupsatsrce_qi = in_supsat[0, 0, 0] - # add ice to first guess for deposition term - qifg += in_supsat[0, 0, 0] - - # increase cloud amount using RKOOPTAU timescale - solac = (1 - a) * faci - - # *** 3.2: detrainment from convection - if tmp_klevel[0] < NLEV - 1: - lude = in_lude[0, 0, 0] * dtgdp - - if in_convection_on[0, 0] and lude[0, 0, 0] > RLMIN and in_lu[0, 0, 1] > EPSEC: - solac += lude[0, 0, 0] / in_lu[0, 0, 1] - # diagnostic temperature split - convsrce_ql = foealfa * lude - convsrce_qi = (1 - foealfa) * lude - solqa_ql_ql += convsrce_ql - solqa_qi_qi += convsrce_qi - else: - lude = 0.0 - - # convective snow detrainment source - if in_convection_on[0, 0]: - solqa_qs_qs += in_snde[0, 0, 0] * dtgdp - else: - lude = in_lude[0, 0, 0] - - # *** 3.3: subsidence compensating convective updraughts - # --- subsidence source from layer above and evaporation of cloud within the layer - if tmp_klevel[0] > NCLDTOP - 1: - mf = max(0.0, (in_mfu + in_mfd) * dtgdp) - acust = mf * anew[0, 0, -1] - - if __INLINED(not FALLQL and PHASEQL > 0): - lcust_ql = mf * qln[0, 0, -1] - # record total flux for enthalpy budget - convsrce_ql += lcust_ql - - if __INLINED(not FALLQI and PHASEQI > 0): - lcust_qi = mf * qin[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qi += lcust_qi - - if __INLINED(not FALLQR and PHASEQR > 0): - lcust_qr = mf * qrn[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qr += lcust_qr - - if __INLINED(not FALLQS and PHASEQS > 0): - lcust_qs = mf * qsn[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qs += lcust_qs - - if __INLINED(not FALLQV and PHASEQV > 0): - lcust_qv = mf * qvn[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qv += lcust_qv - - # work out how much liquid evaporates at arrival point - dtdp = RDCP * 0.5 * (t[0, 0, -1] + t[0, 0, 0]) / in_aph[0, 0, 0] - dtforc = dtdp[0, 0, 0] * (in_ap[0, 0, 0] - in_ap[0, 0, -1]) - dqs = anew[0, 0, -1] * dtforc * dqsmixdt - - if __INLINED(not FALLQL and PHASEQL > 0): - lfinal = max(0.0, lcust_ql - dqs) - evap = min(lcust_ql - lfinal, evaplimmix) - lfinal = lcust_ql - evap - lfinalsum += lfinal - solqa_ql_ql += lcust_ql - solqa_qv_ql += evap - solqa_ql_qv -= evap - - if __INLINED(not FALLQI and PHASEQI > 0): - lfinal = max(0.0, lcust_qi - dqs) - evap = min(lcust_qi - lfinal, evaplimmix) - lfinal = lcust_qi - evap - lfinalsum += lfinal - solqa_qi_qi += lcust_qi - solqa_qv_qi += evap - solqa_qi_qv -= evap - - if __INLINED(not FALLQR and PHASEQR > 0): - lfinal = max(0.0, lcust_qr - dqs) - evap = min(lcust_qr - lfinal, evaplimmix) - lfinal = lcust_qr - evap - lfinalsum += lfinal - solqa_qr_qr += lcust_qr - solqa_qv_qr += evap - solqa_qr_qv -= evap - - if __INLINED(not FALLQS and PHASEQS > 0): - lfinal = max(0.0, lcust_qs - dqs) - evap = min(lcust_qs - lfinal, evaplimmix) - lfinal = lcust_qs - evap - lfinalsum += lfinal - solqa_qs_qs += lcust_qs - solqa_qv_qs += evap - solqa_qs_qv -= evap - - if __INLINED(not FALLQV and PHASEQV > 0): - lfinal = max(0.0, lcust_qv - dqs) - evap = min(lcust_qv - lfinal, evaplimmix) - lfinal = lcust_qv - evap - lfinalsum += lfinal - solqa_qv_qv += lcust_qv - - # reset the cloud contribution if no cloud water survives to this level - if lfinalsum < EPSEC: - acust = 0.0 - solac += acust - - # --- subsidence sink of cloud to the layer below - if tmp_klevel[0] < NLEV - 1: - mfdn = max(0.0, (in_mfu[0, 0, 1] + in_mfd[0, 0, 1]) * dtgdp) - solab += mfdn - solqb_ql_ql += mfdn - solqb_qi_qi += mfdn - - # record sink for cloud budget and enthalpy budget diagnostics - convsink_ql = mfdn - convsink_qi = mfdn - - # *** 3.4: erosion of clouds by turbulent mixing - # --- define turbulent erosion rate - ldifdt = RCLDIFF * dt - if in_convection_type[0, 0] > 0 and lude > EPSEC: - ldifdt *= RCLDIFF_CONVI - - if li > EPSEC: - # calculate environmental humidity - e = ldifdt * max(qsmix - qv, 0.0) - leros = min(min(a * e, evaplimmix), li) - aeros = leros / licld - - # erosion is -ve linear in L, A - solac -= aeros - solqa_qv_ql += liqfrac * leros - solqa_ql_qv -= liqfrac * leros - solqa_qv_qi += icefrac * leros - solqa_qi_qv -= icefrac * leros - - # *** 3.5: condensation/evaporation due to dqsat/dT - dtdp = RDCP * t / in_ap[0, 0, 0] - dpmxdt = dp / dt - mfdn = in_mfu[0, 0, 1] + in_mfd[0, 0, 1] if tmp_klevel[0] < NLEV - 1 else 0.0 - wtot = in_w[0, 0, 0] + 0.5 * RG * (in_mfu[0, 0, 0] + in_mfd[0, 0, 0] + mfdn) - wtot = min(dpmxdt, max(-dpmxdt, wtot)) - zzdt = in_hrsw[0, 0, 0] + in_hrlw[0, 0, 0] - dtdiab = min(dpmxdt * dtdp, max(-dpmxdt * dtdp, zzdt)) * dt + RALFDCP * ldefr - dtforc = dtdp * wtot * dt + dtdiab - qold = qsmix - told = t - t = max(t + dtforc, 160.0) - - qsmix, t = f_cuadjtq(in_ap, qsmix, t) - - dqs = qsmix - qold - qsmix = qold - t = told - - # ***: 3.5a: evaporation of clouds - if dqs > 0: - levap = min(min(a * min(dqs, licld), evaplimmix), max(qsmix - qv, 0.0)) - solqa_qv_ql += liqfrac * levap - solqa_ql_qv -= liqfrac * levap - solqa_qv_qi += icefrac * levap - solqa_qi_qv -= icefrac * levap - - # *** 3.5b: formation of clouds - # increase of cloud water in existing clouds - if a > EPSEC and dqs <= -RLMIN: - lcond1 = max(-dqs, 0.0) - - # old limiter - if a > 0.99: - cor = 1 / (1 - RETV * qsmix) - cdmax = (qv - qsmix) / (1 + cor * qsmix * f_foedem(t)) - else: - cdmax = (qv - a * qsmix) / a - - lcond1 = a * max(min(lcond1, cdmax), 0.0) - if lcond1 < RLMIN: - lcond1 = 0.0 - - # --- all increase goes into liquid unless so cold cloud homogeneously freezes - if t > RTHOMO: - solqa_ql_qv += lcond1 - solqa_qv_ql -= lcond1 - qlfg += lcond1 - else: - solqa_qi_qv += lcond1 - solqa_qv_qi -= lcond1 - qifg += lcond1 - - # generation of new clouds (da/dt > 0) - if dqs <= -RLMIN and a < 1 - EPSEC: - # --- critical relative humidity - rhc = RAMID - sigk = in_ap[0, 0, 0] / tmp_aph_s[0, 0] - if sigk > 0.8: - rhc += (1 - RAMID) * ((sigk - 0.8) / 0.2) ** 2 - - # --- supersaturation options - if __INLINED(NSSOPT == 0): - # no scheme - qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a)) - elif __INLINED(NSSOPT == 1): - # Tompkins - qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a)) - elif __INLINED(NSSOPT == 2): - # Lohmann and Karcher - qe = qv - else: - # Gierens - qe = qv + li - - if t >= RTT or NSSOPT == 0: - # no ice supersaturation allowed - fac = 1.0 - else: - # ice supersaturation - fac = fokoop - - if qe >= rhc * qsice * fac and qe < qsice * fac: - acond = -(1 - a) * fac * dqs / max(2 * (fac * qsice - qe), EPSEC) - acond = min(acond, 1 - a) - - # linear term - lcond2 = -fac * dqs * 0.5 * acond - - # new limiter formulation - zdl = 2 * (fac * qsice - qe) / max(EPSEC, 1 - a) - expr2 = fac * dqs - if expr2 < -zdl: - lcondlim = (a - 1) * expr2 - fac * qsice + qv - lcond2 = min(lcond2, lcondlim) - lcond2 = max(lcond2, 0.0) - - expr10 = 1 - a - if lcond2 < RLMIN or expr10 < EPSEC: - lcond2 = 0.0 - acond = 0.0 - if lcond2 == 0.0: - acond = 0.0 - - # large-scale generation is linear in A and linear in L - solac += acond - - # --- all increase goes into liquid unless so cold cloud homogeneously freezes - if t > RTHOMO: - solqa_ql_qv += lcond2 - solqa_qv_ql -= lcond2 - qlfg += lcond2 - else: # homogeneous freezing - solqa_qi_qv += lcond2 - solqa_qv_qi -= lcond2 - qifg += lcond2 - - # *** 3.6: growth of ice by vapour deposition - if __INLINED(DEPICE == 1): # --- ice deposition following Rotstayn et al. (2001) - # --- calculate distance from cloud top - if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF: - tmp_cldtopdist[0, 0] = 0.0 - else: - tmp_cldtopdist[0, 0] += dp / (rho * RG) - - # --- only treat depositional growth if liquid present - if t < RTT and qlfg > RLMIN: - vpice = f_foeeice(t) * RV / RD - vpliq = vpice * fokoop - icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639) - - # --- 0.024 is conductivity of air - # --- 8.8 = 700 ** (1/3) = density of ice to the third - add = RLSTT * (RLSTT / (RV * t) - 1) / (0.024 * t) - bdd = RV * t * in_ap[0, 0, 0] / (2.21 * vpice) - cvds = ( - 7.8 - * (icenuclei / rho) ** 0.666 - * (vpliq - vpice) - / (8.87 * (add + bdd) * vpice) - ) - - # --- RICEINIT = 1e-12 is initial mass of ice particle - ice0 = max(icecld, icenuclei * RICEINIT / rho) - - # --- new value of ice - inew = (0.666 * cvds * dt + ice0**0.666) ** 1.5 - - # --- grid-mean deposition rate - depos = max(a * (inew - ice0), 0.0) - - # --- limit deposition to liquid water amount - depos = min(depos, qlfg) - - # --- at top of cloud, reduce deposition rate near cloud top - infactor = min(icenuclei / 15000, 1.0) - depos *= min( - infactor - + (1 - infactor) - * (RDEPLIQREFRATE + tmp_cldtopdist[0, 0] / RDEPLIQREFDEPTH), - 1.0, - ) - - # --- add to matrix - solqa_qi_ql += depos - solqa_ql_qi -= depos - qifg += depos - qlfg -= depos - elif __INLINED(DEPICE == 2): # --- ice deposition assuming ice PSD - # --- calculate distance from cloud top - if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF: - tmp_cldtopdist = 0.0 - else: - tmp_cldtopdist += dp / (rho * RG) - - # --- only treat depositional growth if liquid present - if t < RTT and qlfg > RLMIN: - vpice = f_foeeice(t) * RV / RD - vpliq = vpice * fokoop - icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639) - - # --- RICEINIT=1e-12 is the initial mass of ice particle - ice0 = max(icecld, icenuclei * RICEINIT / rho) - - # particle size distribution - tcg = 1 - facx1i = 1 - apb = RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap * RCL_APB3 * t**3 - corrfac = (1 / rho) ** 0.5 - corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120) - pr02 = rho * ice0 * RCL_CONST1I / (tcg * facx1i) - term1 = ( - (vpliq - vpice) - * t**2 - * vpice - * corrfac2 - * tcg - * RCL_CONST2I - * facx1i - / (rho * apb * vpice) - ) - term2 = ( - 0.65 * RCL_CONST6I * pr02**RCL_CONST4I - + RCL_CONST3I - * corrfac**0.5 - * rho**0.5 - * pr02**RCL_CONST5I - / corrfac2**0.5 - ) - depos = max(a * term1 * term2 * dt, 0.0) - - # --- limit deposition to liquid water amount - depos = min(depos, qlfg) - - # --- at top of cloud, reduce deposition rate near cloud top to account for - # --- small scale turbulent processes - infactor = min(icenuclei / 15000, 1.0) - depos *= min( - infactor - + (1 - infactor) * (RDEPLIQREFRATE + tmp_cldtopdist / RDEPLIQREFDEPTH), - 1.0, - ) - - # --- add to matrix - solqa_qi_ql += depos - solqa_ql_qi -= depos - qifg += depos - qlfg -= depos - - # === 4: precipitation processes - # --- revise in-cloud condensate amount - tmpa = 1 / max(a, EPSEC) - liqcld = qlfg * tmpa - icecld = qifg * tmpa - - # *** 4.1a: sedimentation/falling of ql - if __INLINED(FALLQL): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_ql = pfplsl[0, 0, -1] * dtgdp - solqa_ql_ql += fallsrce_ql - qlfg += fallsrce_ql - # use first guess precip - qpretot += qlfg - - # --- sink to next layer, constant fall speed - fallsink_ql = dtgdp * VQL * rho - else: - fallsink_ql = 0.0 - - # *** 4.1b: sedimentation/falling of qi - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qi = pfplsi[0, 0, -1] * dtgdp - solqa_qi_qi += fallsrce_qi - qifg += fallsrce_qi - # use first guess precip - qpretot += qifg - - # --- sink to next layer, constant fall speed - if __INLINED(LAERICESED): - vqi = 0.002 * in_re_ice[0, 0, 0] - else: - vqi = VQI - fallsink_qi = dtgdp * vqi * rho - - # *** 4.1c: sedimentation/falling of qr - if __INLINED(FALLQR): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qr = pfplsr[0, 0, -1] * dtgdp - solqa_qr_qr += fallsrce_qr - qrfg += fallsrce_qr - # use first guess precip - qpretot += qrfg - - # --- sink to next layer, constant fall speed - fallsink_qr = dtgdp * VQR * rho - else: - fallsink_qr = 0.0 - - # *** 4.1d: sedimentation/falling of qs - if __INLINED(FALLQS): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qs = pfplss[0, 0, -1] * dtgdp - solqa_qs_qs += fallsrce_qs - qsfg += fallsrce_qs - # use first guess precip - qpretot += qsfg - - # --- sink to next layer, constant fall speed - fallsink_qs = dtgdp * VQS * rho - else: - fallsink_qs = 0.0 - - # *** 4.1e: sedimentation/falling of qv - if __INLINED(FALLQV): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qv = pfplsv[0, 0, -1] * dtgdp - solqa_qv_qv += fallsrce_qv - qvfg += fallsrce_qv - # use first guess precip - qpretot += qvfg - - # --- sink to next layer, constant fall speed - fallsink_qv = dtgdp * VQV * rho - else: - fallsink_qv = 0.0 - - # --- precip cover overlap using RAX-RAN Overlap - if qpretot > EPSEC: - tmp_covptot[0, 0] = 1 - ( - (1 - tmp_covptot[0, 0]) - * (1 - max(a[0, 0, 0], a[0, 0, -1])) - / (1 - min(a[0, 0, -1], 1 - 1e-6)) - ) - tmp_covptot[0, 0] = max(tmp_covptot[0, 0], RCOVPMIN) - covpclr = max(0.0, tmp_covptot[0, 0] - a) - raincld = qrfg / tmp_covptot[0, 0] - snowcld = qsfg / tmp_covptot[0, 0] - tmp_covpmax[0, 0] = max(tmp_covptot[0, 0], tmp_covpmax[0, 0]) - else: - raincld = 0.0 - snowcld = 0.0 - tmp_covptot[0, 0] = 0.0 - covpclr = 0.0 - tmp_covpmax[0, 0] = 0.0 - - # *** 4.2a: autoconversion to snow - if t <= RTT: - # --- snow autoconversion rate follow Lin et al. 1983 - if icecld > EPSEC: - co = dt * RSNOWLIN1 * exp(RSNOWLIN2 * (t - RTT)) - - if __INLINED(LAERICEAUTO): - lcrit = in_icrit_aer[0, 0, 0] - co *= (RNICE / in_nice[0, 0, 0]) ** 0.333 - else: - lcrit = RLCRITSNOW - - snowaut = co * (1 - exp(-((icecld / lcrit) ** 2))) - solqb_qs_qi += snowaut - - # *** 4.2b: autoconversion warm clouds - if liqcld > EPSEC: - if __INLINED(WARMRAIN == 1): # --- warm-rain process follow Sundqvist (1989) - co = RKCONV * dt - - if __INLINED(LAERLIQAUTOLSP): - lcrit = in_lcrit_aer[0, 0, 0] - co *= (RCCN / in_ccn[0, 0, 0]) ** 0.333 - else: - lcrit = RCLCRIT_LAND if in_lsm[0, 0] > 0.5 else RCLCRIT_SEA - - # --- parameters for cloud collection by rain and snow - precip = (pfplss[0, 0, -1] + pfplsr[0, 0, -1]) / max(EPSEC, tmp_covptot[0, 0]) - cfpr = 1 + RPRC1 * sqrt(max(precip, 0.0)) - if __INLINED(LAERLIQCOLL): - cfpr *= (RCCN / in_ccn[0, 0, 0]) ** 0.333 - - co *= cfpr - lcrit /= max(cfpr, EPSEC) - - rainaut = co - if liqcld / lcrit < 20: - rainaut *= 1 - exp(-((liqcld / lcrit) ** 2)) - - # rain freezes instantly - if t <= RTT: - solqb_qs_ql += rainaut - else: - solqb_qr_ql += rainaut - elif __INLINED( - WARMRAIN == 2 - ): # --- warm-rain process follow Khairoutdinov and Kogan (2000) - if in_lsm[0, 0] > 0.5: - const = RCL_KK_cloud_num_land - lcrit = RCLCRIT_LAND - else: - const = RCL_KK_cloud_num_sea - lcrit = RCLCRIT_SEA - - if liqcld > lcrit: - rainaut = ( - 1.5 * a * dt * RCL_KKAau * liqcld**RCL_KKBauq * const**RCL_KKBaun - ) - rainaut = min(rainaut, qlfg) - if rainaut < EPSEC: - rainaut = 0.0 - rainacc = 2 * a * dt * RCL_KKAac * (liqcld * raincld) ** RCL_KKBac - rainacc = min(rainacc, qlfg) - if rainacc < EPSEC: - rainacc = 0.0 - else: - rainaut = 0.0 - rainacc = 0.0 - - expr3 = rainaut + rainacc - if t <= RTT: - solqa_qs_ql += expr3 - solqa_ql_qs -= expr3 - else: - solqa_qr_ql += expr3 - solqa_ql_qr -= expr3 - - # --- riming - collection of cloud liquid drops by snow and ice - if __INLINED(WARMRAIN > 1): - if t <= RTT and liqcld > EPSEC: - # fallspeed air density correction - fallcorr = (RDENSREF / rho) ** 0.4 - - # --- riming of snow by cloud water - implicit in lwc - if snowcld > EPSEC and tmp_covptot[0, 0] > 0.01: - # calculate riming term - snowrime = ( - 0.3 - * tmp_covptot[0, 0] - * dt - * RCL_CONST7S - * fallcorr - * (rho * snowcld * RCL_CONST1S) ** RCL_CONST8S - ) - - # limit snow riming term - snowrime = min(snowrime, 1.0) - - solqb_qs_ql += snowrime - - # *** 4.3a: melting of snow and ice - icetot = qifg + qsfg - meltmax = 0.0 - - # if there are frozen hydrometeors present and dry-bulb temperature > 0degC - if icetot > EPSEC and t > RTT: - # calculate subsaturation - subsat = max(qsice - qv, 0.0) - - # calculate difference between dry-bulb and the temperature at which the wet-buld=0degC - # using and approx - tdmtw0 = t - RTT - subsat * (TW1 + TW2 * (in_ap[0, 0, 0] - TW3) - TW4 * (t - TW5)) - - # ensure cons1 is positive - cons1 = abs(dt * (1 + 0.5 * tdmtw0) / RTAUMEL) - meltmax = max(tdmtw0 * cons1 * RLDCP, 0.0) - - if meltmax > EPSEC and icetot > EPSEC: - # apply melting in same proportion as frozen hydrometeor fractions - alfa_qi = qifg / icetot - melt_qi = min(qifg, alfa_qi * meltmax) - alfa_qs = qsfg / icetot - melt_qs = min(qsfg, alfa_qs * meltmax) - - # needed in first guess - qifg -= melt_qi - qrfg += melt_qi + melt_qs - qsfg -= melt_qs - solqa_qi_qr -= melt_qi - solqa_qr_qi += melt_qi - solqa_qr_qs += melt_qs - solqa_qs_qr -= melt_qs - - # *** 4.3b: freezing of rain - if qr > EPSEC: - if t[0, 0, 0] <= RTT and t[0, 0, -1] > RTT: - # base of melting layer/top of refreezing layer so store rain/snow fraction for - # precip type diagnosis - qpretot = max(qs + qr, EPSEC) - out_rainfrac_toprfz[0, 0] = qr / qpretot - tmp_rainliq[0, 0] = out_rainfrac_toprfz[0, 0] > 0.8 - - if t < RTT: - if tmp_rainliq[0, 0]: - # majority of raindrops completely melted - # slope of rain partical size distribution - lambda_ = (RCL_FAC1 / (rho * qr)) ** RCL_FAC2 - - # calculate freezing rate based on Bigg (1953) and Wisner (1972) - temp = RCL_FZRAB * (t - RTT) - frz = dt * (RCL_CONST5R / rho) * (exp(temp) - 1) * lambda_**RCL_CONST6R - frzmax = max(frz, 0.0) - else: - # majority of raindrops only partially melted - cons1 = abs(dt * (1 + 0.5 * (RTT - t)) / RTAUMEL) - frzmax = max((RTT - t) * cons1 * RLDCP, 0.0) - - if frzmax > EPSEC: - frz = min(qr, frzmax) - solqa_qs_qr += frz - solqa_qr_qs -= frz - - # *** 4.3c: freezing of liquid - frzmax = max((RTHOMO - t) * RLDCP, 0.0) - if frzmax > EPSEC and qlfg > EPSEC: - frz = min(qlfg, frzmax) - solqa_qi_ql += frz - solqa_ql_qi -= frz - - # *** 4.4: evaporation of rain/snow - if __INLINED(EVAPRAIN == 1): # --- rain evaporation scheme from Sundquist - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - qe = (qv - a * qsliq) / max(EPSEC, 1 - a) - - # --- humidity in moistest covpclr part of domain - qe = max(0.0, min(qe, qsliq)) - lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq - if lo1: - # note: preclr is a rain flux - expr4 = tmp_covptot[0, 0] * dtgdp - expr5 = max(abs(expr4), EPSILON) - expr6 = expr5 if expr4 > 0 else -expr5 - preclr = qrfg * covpclr / expr6 - - # --- actual microphysics formula in beta - beta1 = ( - sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0]) - / RVRFACTOR - * preclr - / max(covpclr, EPSEC) - ) - beta = RG * RPECONS * 0.5 * beta1**0.5777 - denom = 1 + beta * dt * corqsliq - dpr = covpclr * beta * (qsliq - qe) / denom * dp / RG - dpevap = dpr * dtgdp - - # --- add evaporation term to explicit sink - evap = min(dpevap, qrfg) - solqa_qv_qr += evap - solqa_qr_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, - tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg), - ) - - # update fg field - qrfg -= evap - elif __INLINED( - EVAPRAIN == 2 - ): # --- rain evaporation scheme based on Abel and Boutle (2013) - # --- calculate relative humidity limit for rain evaporation - # limit rh for rain evaporation dependent on precipitation fraction - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - - # further limit rh for rain evaporation to 80% - rh = min(0.8, rh) - - qe = max(0.0, min(qv, qsliq)) - lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq - if lo1: - # --- Abel and Boutle (2012) evaporation - # calculate local precipitation (kg/kg) - preclr = qrfg / tmp_covptot[0, 0] - - # fallspeed air density correction - fallcorr = (RDENSREF / rho) ** 0.4 - - # saturation vapor pressure with respect to liquid phase - esatliq = RV / RD * f_foeeliq(t) - - # slope of particle size distribution - lambda_ = (RCL_FAC1 / (rho * preclr)) ** RCL_FAC2 - - evap_denom = ( - RCL_CDENOM1 * esatliq - - RCL_CDENOM2 * t * esatliq - + RCL_CDENOM3 * t**3 * in_ap[0, 0, 0] - ) - - # temperature dependent conductivity - corr2 = (t / 273) ** 1.5 * 393 / (t + 120) - - subsat = max(rh * qsliq - qe, 0.0) - beta = ( - 0.5 - / qsliq - * t**2 - * esatliq - * RCL_CONST1R - * (corr2 / evap_denom) - * ( - 0.78 / lambda_**RCL_CONST4R - + RCL_CONST2R - * (rho * fallcorr) ** 0.5 - / (corr2**0.5 * lambda_**RCL_CONST3R) - ) - ) - denom = 1 + beta * dt - dpevap = covpclr * beta * dt * subsat / denom - - # --- add evaporation term to explicit sink - evap = min(dpevap, qrfg) - solqa_qv_qr += evap - solqa_qr_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, - tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg), - ) - - # update fg field - qrfg -= evap - - # *** 4.5: evaporation of snow - if __INLINED(EVAPSNOW == 1): - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - qe = (qv - a * qsice) / max(EPSEC, 1 - a) - - # --- humidity in moistest covpclr part of domain - qe = max(0.0, min(qe, qsice)) - lo1 = covpclr > EPSEC and qsfg > EPSEC and qe < rh * qsice - if lo1: - expr7 = tmp_covptot[0, 0] * dtgdp - expr8 = max(abs(expr7), EPSILON) - expr9 = expr8 if expr7 > 0 else -expr8 - preclr = qsfg * covpclr / expr9 - - # --- actual microphysics formula in beta - beta1 = ( - sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0]) - / RVRFACTOR - * preclr - / max(covpclr, EPSEC) - ) - beta = RG * RPECONS * beta1**0.5777 - denom = 1 + beta * dt * corqsice - dpr = covpclr * beta * (qsice - qe) / denom * dp / RG - dpevap = dpr * dtgdp - - # --- add evaporation term to explicit sink - evap = min(dpevap, qsfg) - solqa_qv_qs += evap - solqa_qs_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, - tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qsfg), - ) - - # update first guess field - qsfg -= evap - elif __INLINED(EVAPSNOW == 2): - # --- calculate relative humidity limit for snow evaporation - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - qe = (qv - a * qsice) / max(EPSEC, 1 - a) - - # --- humidity in moistest covpclr part of domain - qe = max(0.0, min(qe, qsice)) - lo1 = covpclr > EPSEC and qs > EPSEC and qe < rh * qsice - if lo1: - # calculate local precipitation (kg/kg) - preclr = qsfg / tmp_covptot[0, 0] - vpice = f_foeeice(t) * RV / RD - - # particle size distribution - tcg = 1.0 - facx1s = 1.0 - apb = ( - RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap[0, 0, 0] * RCL_APB3 * t**3 - ) - corrfac = (1 / rho) ** 0.5 - corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120) - pr02 = rho * preclr * RCL_CONST1S / (tcg * facx1s) - term1 = ( - (qsice - qe) - * t**2 - * vpice - * corrfac2 - * tcg - * RCL_CONST2S - * facx1s - / (rho * apb * qsice) - ) - term2 = ( - 0.65 * RCL_CONST6S * pr02**RCL_CONST4S - + RCL_CONST3S - * corrfac**0.5 - * rho**0.5 - * pr02**RCL_CONST5S - / corrfac2**0.5 - ) - dpevap = max(covpclr * term1 * term2 * dt, 0.0) - - # --- limit evaporation to snow amount - evap = min(min(dpevap, evaplimice), qs) - solqa_qv_qs += evap - solqa_qs_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qs) - ) - - # update first guess field - qsfg -= evap - - # --- evaporate small precipitation amounts - if __INLINED(FALLQL): - if qlfg < RLMIN: - solqa_qv_ql += qlfg - solqa_ql_qv -= qlfg - if __INLINED(FALLQI): - if qifg < RLMIN: - solqa_qv_qi += qifg - solqa_qi_qv -= qifg - if __INLINED(FALLQR): - if qrfg < RLMIN: - solqa_qv_qr += qrfg - solqa_qr_qv -= qrfg - if __INLINED(FALLQS): - if qsfg < RLMIN: - solqa_qv_qs += qsfg - solqa_qs_qv -= qsfg - - # === 5: solvers for A and L - # *** 5.1: solver for cloud cover - anew = min((a + solac) / (1 + solab), 1.0) - if anew < RAMIN: - anew = 0.0 - da = anew - a0 - - # *** 5.2: solver for the microphysics - # --- collect sink terms and mark - sinksum_ql = -(solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv) - sinksum_qi = -(solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv) - sinksum_qr = -(solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv) - sinksum_qs = -(solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv) - sinksum_qv = -(solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv) - - # --- calculate overshoot and scaling factor - max_ql = max(ql, EPSEC) - rat_ql = max(sinksum_ql, max_ql) - ratio_ql = max_ql / rat_ql - max_qi = max(qi, EPSEC) - rat_qi = max(sinksum_qi, max_qi) - ratio_qi = max_qi / rat_qi - max_qr = max(qr, EPSEC) - rat_qr = max(sinksum_qr, max_qr) - ratio_qr = max_qr / rat_qr - max_qs = max(qs, EPSEC) - rat_qs = max(sinksum_qs, max_qs) - ratio_qs = max_qs / rat_qs - max_qv = max(qv, EPSEC) - rat_qv = max(sinksum_qv, max_qv) - ratio_qv = max_qv / rat_qv - - # --- now sort ratio to find out which species run out first - order_ql, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_ql, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qi, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qi, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qr, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qr, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qs, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qs, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qv, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qv, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - - # scale the sink terms, in the correct order, recalculating the scale factor each time - sinksum_ql = 0.0 - sinksum_qi = 0.0 - sinksum_qr = 0.0 - sinksum_qs = 0.0 - sinksum_qv = 0.0 - - # --- recalculate sum and scaling factor, and then scale - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_ql, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qi, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qr, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qs, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qv, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - - # *** 5.2.2: solver - # --- set the lhs of equation - # --- diagonals: microphysical sink terms + transport - lhs_ql_ql = ( - 1 - + fallsink_ql - + solqb_qv_ql - + solqb_ql_ql - + solqb_qi_ql - + solqb_qr_ql - + solqb_qs_ql - ) - lhs_qi_qi = ( - 1 - + fallsink_qi - + solqb_qv_qi - + solqb_ql_qi - + solqb_qi_qi - + solqb_qr_qi - + solqb_qs_qi - ) - lhs_qr_qr = ( - 1 - + fallsink_qr - + solqb_qv_qr - + solqb_ql_qr - + solqb_qi_qr - + solqb_qr_qr - + solqb_qs_qr - ) - lhs_qs_qs = ( - 1 - + fallsink_qs - + solqb_qv_qs - + solqb_ql_qs - + solqb_qi_qs - + solqb_qr_qs - + solqb_qs_qs - ) - lhs_qv_qv = ( - 1 - + fallsink_qv - + solqb_qv_qv - + solqb_ql_qv - + solqb_qi_qv - + solqb_qr_qv - + solqb_qs_qv - ) - - # --- non-diagonals: microphysical source terms - lhs_ql_qi = -solqb_ql_qi - lhs_ql_qr = -solqb_ql_qr - lhs_ql_qs = -solqb_ql_qs - lhs_ql_qv = -solqb_ql_qv - lhs_qi_ql = -solqb_qi_ql - lhs_qi_qr = -solqb_qi_qr - lhs_qi_qs = -solqb_qi_qs - lhs_qi_qv = -solqb_qi_qv - lhs_qr_ql = -solqb_qr_ql - lhs_qr_qi = -solqb_qr_qi - lhs_qr_qs = -solqb_qr_qs - lhs_qr_qv = -solqb_qr_qv - lhs_qs_ql = -solqb_qs_ql - lhs_qs_qi = -solqb_qs_qi - lhs_qs_qr = -solqb_qs_qr - lhs_qs_qv = -solqb_qs_qv - lhs_qv_ql = -solqb_qv_ql - lhs_qv_qi = -solqb_qv_qi - lhs_qv_qr = -solqb_qv_qr - lhs_qv_qs = -solqb_qv_qs - - # --- set the rhs of equation - # --- sum the explicit source and sink - qln = ql + solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv - qin = qi + solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv - qrn = qr + solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv - qsn = qs + solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv - qvn = qv + solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv - - # --- solve by LU decomposition - # non pivoting recursive factorization - lhs_qi_ql /= lhs_ql_ql # JN=1, JM=2 - lhs_qi_qi -= lhs_qi_ql * lhs_ql_qi # JN=1, JM=2, IK=2 - lhs_qi_qr -= lhs_qi_ql * lhs_ql_qr # JN=1, JM=2, IK=3 - lhs_qi_qs -= lhs_qi_ql * lhs_ql_qs # JN=1, JM=2, IK=4 - lhs_qi_qv -= lhs_qi_ql * lhs_ql_qv # JN=1, JM=2, IK=0 - lhs_qr_ql /= lhs_ql_ql # JN=1, JM=3 - lhs_qr_qi -= lhs_qr_ql * lhs_ql_qi # JN=1, JM=3, IK=2 - lhs_qr_qr -= lhs_qr_ql * lhs_ql_qr # JN=1, JM=3, IK=3 - lhs_qr_qs -= lhs_qr_ql * lhs_ql_qs # JN=1, JM=3, IK=4 - lhs_qr_qv -= lhs_qr_ql * lhs_ql_qv # JN=1, JM=3, IK=0 - lhs_qs_ql /= lhs_ql_ql # JN=1, JM=4 - lhs_qs_qi -= lhs_qs_ql * lhs_ql_qi # JN=1, JM=4, IK=2 - lhs_qs_qr -= lhs_qs_ql * lhs_ql_qr # JN=1, JM=4, IK=3 - lhs_qs_qs -= lhs_qs_ql * lhs_ql_qs # JN=1, JM=4, IK=4 - lhs_qs_qv -= lhs_qs_ql * lhs_ql_qv # JN=1, JM=4, IK=0 - lhs_qv_ql /= lhs_ql_ql # JN=1, JM=0 - lhs_qv_qi -= lhs_qv_ql * lhs_ql_qi # JN=1, JM=0, IK=2 - lhs_qv_qr -= lhs_qv_ql * lhs_ql_qr # JN=1, JM=0, IK=3 - lhs_qv_qs -= lhs_qv_ql * lhs_ql_qs # JN=1, JM=0, IK=4 - lhs_qv_qv -= lhs_qv_ql * lhs_ql_qv # JN=1, JM=0, IK=0 - lhs_qr_qi /= lhs_qi_qi # JN=2, JM=3 - lhs_qr_qr -= lhs_qr_qi * lhs_qi_qr # JN=2, JM=3, IK=3 - lhs_qr_qs -= lhs_qr_qi * lhs_qi_qs # JN=2, JM=3, IK=4 - lhs_qr_qv -= lhs_qr_qi * lhs_qi_qv # JN=2, JM=3, IK=0 - lhs_qs_qi /= lhs_qi_qi # JN=2, JM=4 - lhs_qs_qr -= lhs_qs_qi * lhs_qi_qr # JN=2, JM=4, IK=3 - lhs_qs_qs -= lhs_qs_qi * lhs_qi_qs # JN=2, JM=4, IK=4 - lhs_qs_qv -= lhs_qs_qi * lhs_qi_qv # JN=2, JM=4, IK=0 - lhs_qv_qi /= lhs_qi_qi # JN=2, JM=0 - lhs_qv_qr -= lhs_qv_qi * lhs_qi_qr # JN=2, JM=0, IK=3 - lhs_qv_qs -= lhs_qv_qi * lhs_qi_qs # JN=2, JM=0, IK=4 - lhs_qv_qv -= lhs_qv_qi * lhs_qi_qv # JN=2, JM=0, IK=0 - lhs_qs_qr /= lhs_qr_qr # JN=3, JM=4 - lhs_qs_qs -= lhs_qs_qr * lhs_qr_qs # JN=3, JM=4, IK=4 - lhs_qs_qv -= lhs_qs_qr * lhs_qr_qv # JN=3, JM=4, IK=0 - lhs_qv_qr /= lhs_qr_qr # JN=3, JM=0 - lhs_qv_qs -= lhs_qv_qr * lhs_qr_qs # JN=3, JM=0, IK=4 - lhs_qv_qv -= lhs_qv_qr * lhs_qr_qv # JN=3, JM=0, IK=0 - lhs_qv_qs /= lhs_qs_qs # JN=4, JM=0 - lhs_qv_qv -= lhs_qv_qs * lhs_qs_qv # JN=4, JM=0, IK=0 - - # backsubstitution: step 1 - qin -= lhs_qi_ql * qln - qrn -= lhs_qr_ql * qln + lhs_qr_qi * qin - qsn -= lhs_qs_ql * qln + lhs_qs_qi * qin + lhs_qs_qr * qrn - qvn -= lhs_qv_ql * qln + lhs_qv_qi * qin + lhs_qv_qr * qrn + lhs_qv_qs * qsn - - # backsubstitution: step 2 - qvn /= lhs_qv_qv - qsn -= lhs_qs_qv * qvn - qsn /= lhs_qs_qs - qrn -= lhs_qr_qs * qsn + lhs_qr_qv * qvn - qrn /= lhs_qr_qr - qin -= lhs_qi_qr * qrn + lhs_qi_qs * qsn + lhs_qi_qv * qvn - qin /= lhs_qi_qi - qln -= lhs_ql_qi * qin + lhs_ql_qr * qrn + lhs_ql_qs * qsn + lhs_ql_qv * qvn - qln /= lhs_ql_ql - - # ensure no small values (including negatives) remain in cloud variables - # nor precipitation rates - if qln < EPSEC: - qvn += qln - qln = 0.0 - if qin < EPSEC: - qvn += qin - qin = 0.0 - if qrn < EPSEC: - qvn += qrn - qrn = 0.0 - if qsn < EPSEC: - qvn += qsn - qsn = 0.0 - - # *** 5.3: precipitation/sedimentation fluxes to next level diagnostic precipitation fluxes - pfplsl = fallsink_ql * qln * rdtgdp - pfplsi = fallsink_qi * qin * rdtgdp - pfplsr = fallsink_qr * qrn * rdtgdp - pfplss = fallsink_qs * qsn * rdtgdp - pfplsv = fallsink_qv * qvn * rdtgdp - - # ensure precipitation fraction is zero if no precipitation - qpretot = pfplss + pfplsr - if qpretot < EPSEC: - tmp_covptot[0, 0] = 0.0 - - # === 6: update tendencies - # *** 6.1: temperature and CLV budgets - flux_ql = psupsatsrce_ql + convsrce_ql + fallsrce_ql - (fallsink_ql + convsink_ql) * qln - if __INLINED(PHASEQL == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (qln - ql - flux_ql) / dt - if __INLINED(PHASEQL == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (qln - ql - flux_ql) / dt - out_tnd_loc_ql[0, 0, 0] += (qln - ql0) / dt - - flux_qi = psupsatsrce_qi + convsrce_qi + fallsrce_qi - (fallsink_qi + convsink_qi) * qin - if __INLINED(PHASEQI == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (qin - qi - flux_qi) / dt - if __INLINED(PHASEQI == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (qin - qi - flux_qi) / dt - out_tnd_loc_qi[0, 0, 0] += (qin - qi0) / dt - - flux_qr = psupsatsrce_qr + convsrce_qr + fallsrce_qr - (fallsink_qr + convsink_qr) * qrn - if __INLINED(PHASEQR == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (qrn - qr - flux_qr) / dt - if __INLINED(PHASEQR == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (qrn - qr - flux_qr) / dt - out_tnd_loc_qr[0, 0, 0] += (qrn - qr0) / dt - - flux_qs = psupsatsrce_qs + convsrce_qs + fallsrce_qs - (fallsink_qs + convsink_qs) * qsn - if __INLINED(PHASEQS == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (qsn - qs - flux_qs) / dt - if __INLINED(PHASEQS == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (qsn - qs - flux_qs) / dt - out_tnd_loc_qs[0, 0, 0] += (qsn - qs0) / dt - - # *** 6.2: humidity budget - out_tnd_loc_qv[0, 0, 0] += (qvn - qv) / dt - - # *** 6.3: cloud cover - out_tnd_loc_a[0, 0, 0] += da / dt - - # --- copy precipitation fraction into output variable - out_covptot[0, 0, 0] = tmp_covptot[0, 0] - - # === 7: flux/diagnostics computations - with computation(FORWARD): - with interval(0, 1): - out_fplsl[0, 0, 0] = 0.0 - out_fplsn[0, 0, 0] = 0.0 - out_fhpsl[0, 0, 0] = 0.0 - out_fhpsn[0, 0, 0] = 0.0 - out_fsqlf[0, 0, 0] = 0.0 - out_fsqif[0, 0, 0] = 0.0 - out_fsqrf[0, 0, 0] = 0.0 - out_fsqsf[0, 0, 0] = 0.0 - out_fcqlng[0, 0, 0] = 0.0 - out_fcqnng[0, 0, 0] = 0.0 - out_fcqrng[0, 0, 0] = 0.0 - out_fcqsng[0, 0, 0] = 0.0 - out_fsqltur[0, 0, 0] = 0.0 - out_fsqitur[0, 0, 0] = 0.0 - with interval(1, None): - # --- copy general precip arrays back info PFP arrays for GRIB archiving - out_fplsl[0, 0, 0] = pfplsr[0, 0, -1] + pfplsl[0, 0, -1] - out_fplsn[0, 0, 0] = pfplss[0, 0, -1] + pfplsi[0, 0, -1] - - # --- enthalpy flux due to precipitation - out_fhpsl[0, 0, 0] = -RLVTT * out_fplsl[0, 0, 0] - out_fhpsn[0, 0, 0] = -RLSTT * out_fplsn[0, 0, 0] - - gdph_r = -(in_aph[0, 0, 0] - in_aph[0, 0, -1]) / (RG * dt) - out_fsqlf[0, 0, 0] = out_fsqlf[0, 0, -1] - out_fsqif[0, 0, 0] = out_fsqif[0, 0, -1] - out_fsqrf[0, 0, 0] = out_fsqlf[0, 0, -1] - out_fsqsf[0, 0, 0] = out_fsqif[0, 0, -1] - out_fcqlng[0, 0, 0] = out_fcqlng[0, 0, -1] - out_fcqnng[0, 0, 0] = out_fcqnng[0, 0, -1] - out_fcqrng[0, 0, 0] = out_fcqlng[0, 0, -1] - out_fcqsng[0, 0, 0] = out_fcqnng[0, 0, -1] - out_fsqltur[0, 0, 0] = out_fsqltur[0, 0, -1] - out_fsqitur[0, 0, 0] = out_fsqitur[0, 0, -1] - - # liquid, LS scheme minus detrainment - out_fsqlf[0, 0, 0] += ( - qln[0, 0, -1] - - ql0[0, 0, -1] - + in_vfl[0, 0, -1] * dt - - foealfa[0, 0, -1] * lude[0, 0, -1] - ) * gdph_r - # liquid, negative numbers - out_fcqlng[0, 0, 0] += lneg_ql[0, 0, -1] * gdph_r - # liquid, vertical diffusion - out_fsqltur[0, 0, 0] += in_vfl[0, 0, -1] * dt * gdph_r - - # rain, LS scheme - out_fsqrf[0, 0, 0] += (qrn[0, 0, -1] - qr0[0, 0, -1]) * gdph_r - # rain, negative numbers - out_fcqrng[0, 0, 0] += lneg_qr[0, 0, -1] * gdph_r - - # ice, LS scheme minus detrainment - out_fsqif[0, 0, 0] += ( - qin[0, 0, -1] - - qi0[0, 0, -1] - + in_vfi[0, 0, -1] * dt - - (1 - foealfa[0, 0, -1]) * lude[0, 0, -1] - ) * gdph_r - # ice, negative numbers - out_fcqnng[0, 0, 0] += lneg_qi[0, 0, -1] * gdph_r - # ice, vertical diffusion - out_fsqitur[0, 0, 0] += in_vfi[0, 0, -1] * dt * gdph_r - - # snow, LS scheme - out_fsqsf[0, 0, 0] += (qsn[0, 0, -1] - qs0[0, 0, -1]) * gdph_r - # snow, negative numbers - out_fcqsng[0, 0, 0] += lneg_qs[0, 0, -1] * gdph_r diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py deleted file mode 100644 index f081fa2a..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cloudsc_split.py +++ /dev/null @@ -1,2279 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations - -from gt4py.cartesian.gtscript import Field, IJ, K - -from cloudsc4py.framework.stencil import stencil_collection -from cloudsc4py.physics._stencils.cuadjtq import f_cuadjtq -from cloudsc4py.physics._stencils.fccld import f_fokoop -from cloudsc4py.physics._stencils.fcttre import ( - f_foealfa, - f_foedelta, - f_foedem, - f_foeeice, - f_foeeliq, - f_foeewm, - f_foeldcpm, -) -from cloudsc4py.physics._stencils.helpers import f_helper_0, f_helper_1 - - -@stencil_collection("cloudsc_tendencies") -def cloudsc_tendencies( - in_a: Field["float"], - in_ap: Field["float"], - in_aph: Field["float"], # staggered - in_ccn: Field["float"], - in_convection_on: Field[IJ, "bool"], - in_convection_type: Field[IJ, "int"], - in_hrlw: Field["float"], - in_hrsw: Field["float"], - in_icrit_aer: Field["float"], - in_lcrit_aer: Field["float"], - in_lsm: Field[IJ, "float"], - in_lu: Field["float"], - in_lude: Field["float"], - in_mfd: Field["float"], - in_mfu: Field["float"], - in_nice: Field["float"], - in_qi: Field["float"], - in_ql: Field["float"], - in_qr: Field["float"], - in_qs: Field["float"], - in_qv: Field["float"], - in_re_ice: Field["float"], - in_snde: Field["float"], - in_supsat: Field["float"], - in_t: Field["float"], - in_tnd_tmp_a: Field["float"], - in_tnd_tmp_qi: Field["float"], - in_tnd_tmp_ql: Field["float"], - in_tnd_tmp_qr: Field["float"], - in_tnd_tmp_qs: Field["float"], - in_tnd_tmp_qv: Field["float"], - in_tnd_tmp_t: Field["float"], - in_w: Field["float"], - out_covptot: Field["float"], - out_foealfa: Field["float"], - out_lneg_qi: Field["float"], - out_lneg_ql: Field["float"], - out_lneg_qr: Field["float"], - out_lneg_qs: Field["float"], - out_lude: Field["float"], - out_pfplsi: Field["float"], - out_pfplsl: Field["float"], - out_pfplsr: Field["float"], - out_pfplss: Field["float"], - out_qi0: Field["float"], - out_qin: Field["float"], - out_ql0: Field["float"], - out_qln: Field["float"], - out_qr0: Field["float"], - out_qrn: Field["float"], - out_qs0: Field["float"], - out_qsn: Field["float"], - out_rainfrac_toprfz: Field[IJ, "float"], - out_tnd_loc_a: Field["float"], - out_tnd_loc_qi: Field["float"], - out_tnd_loc_ql: Field["float"], - out_tnd_loc_qr: Field["float"], - out_tnd_loc_qs: Field["float"], - out_tnd_loc_qv: Field["float"], - out_tnd_loc_t: Field["float"], - tmp_aph_s: Field[IJ, "float"], - tmp_cldtopdist: Field[IJ, "float"], - tmp_covpmax: Field[IJ, "float"], - tmp_covptot: Field[IJ, "float"], - tmp_klevel: Field[K, "int"], - tmp_paphd: Field[IJ, "float"], - tmp_rainliq: Field[IJ, "bool"], - tmp_trpaus: Field[IJ, "float"], - *, - dt: "float", -): - from __externals__ import ( - DEPICE, - EPSEC, - EPSILON, - EVAPRAIN, - EVAPSNOW, - FALLQI, - FALLQL, - FALLQR, - FALLQS, - FALLQV, - LAERICEAUTO, - LAERICESED, - LAERLIQAUTOLSP, - LAERLIQCOLL, - NCLDTOP, - NLEV, - NSSOPT, - PHASEQI, - PHASEQL, - PHASEQR, - PHASEQS, - PHASEQV, - R4IES, - R4LES, - R5IES, - R5LES, - RALFDCP, - RALSDCP, - RALVDCP, - RAMID, - RAMIN, - RCCN, - RCL_APB1, - RCL_APB2, - RCL_APB3, - RCL_CDENOM1, - RCL_CDENOM2, - RCL_CDENOM3, - RCL_CONST1I, - RCL_CONST1R, - RCL_CONST1S, - RCL_CONST2I, - RCL_CONST2R, - RCL_CONST2S, - RCL_CONST3I, - RCL_CONST3R, - RCL_CONST3S, - RCL_CONST4I, - RCL_CONST4R, - RCL_CONST4S, - RCL_CONST5I, - RCL_CONST5R, - RCL_CONST5S, - RCL_CONST6I, - RCL_CONST6R, - RCL_CONST6S, - RCL_CONST7S, - RCL_CONST8S, - RCL_FAC1, - RCL_FAC2, - RCL_FZRAB, - RCL_KK_cloud_num_land, - RCL_KK_cloud_num_sea, - RCL_KKAac, - RCL_KKAau, - RCL_KKBac, - RCL_KKBaun, - RCL_KKBauq, - RCLCRIT_LAND, - RCLCRIT_SEA, - RCLDIFF, - RCLDIFF_CONVI, - RCLDTOPCF, - RCOVPMIN, - RD, - RDCP, - RDENSREF, - RDEPLIQREFDEPTH, - RDEPLIQREFRATE, - RETV, - RG, - RICEINIT, - RKCONV, - RKOOPTAU, - RLCRITSNOW, - RLDCP, - RLMIN, - RLSTT, - RLVTT, - RNICE, - RPECONS, - RPRC1, - RPRECRHMAX, - RSNOWLIN1, - RSNOWLIN2, - RTAUMEL, - RTHOMO, - RTT, - RV, - RVRFACTOR, - TW1, - TW2, - TW3, - TW4, - TW5, - VQI, - VQL, - VQR, - VQS, - VQV, - WARMRAIN, - ) - - with computation(FORWARD), interval(0, 1): - # zero arrays - out_rainfrac_toprfz[0, 0] = 0.0 - tmp_cldtopdist[0, 0] = 0.0 - tmp_covpmax[0, 0] = 0.0 - tmp_covptot[0, 0] = 0.0 - tmp_paphd[0, 0] = 0.0 - tmp_rainliq[0, 0] = True - tmp_trpaus[0, 0] = 0.0 - - with computation(FORWARD), interval(...): - # === 1: initial values for variables - # --- initialization of output tendencies - out_tnd_loc_t[0, 0, 0] = 0 - out_tnd_loc_a[0, 0, 0] = 0 - out_tnd_loc_ql[0, 0, 0] = 0 - out_tnd_loc_qr[0, 0, 0] = 0 - out_tnd_loc_qi[0, 0, 0] = 0 - out_tnd_loc_qs[0, 0, 0] = 0 - out_tnd_loc_qv[0, 0, 0] = 0 - - # --- non CLV initialization - t = in_t[0, 0, 0] + dt * in_tnd_tmp_t[0, 0, 0] - a = in_a[0, 0, 0] + dt * in_tnd_tmp_a[0, 0, 0] - a0 = a - - # --- initialization for CLV family - ql = in_ql[0, 0, 0] + dt * in_tnd_tmp_ql[0, 0, 0] - out_ql0[0, 0, 0] = ql - qi = in_qi[0, 0, 0] + dt * in_tnd_tmp_qi[0, 0, 0] - out_qi0[0, 0, 0] = qi - qr = in_qr[0, 0, 0] + dt * in_tnd_tmp_qr[0, 0, 0] - out_qr0[0, 0, 0] = qr - qs = in_qs[0, 0, 0] + dt * in_tnd_tmp_qs[0, 0, 0] - out_qs0[0, 0, 0] = qs - qv = in_qv[0, 0, 0] + dt * in_tnd_tmp_qv[0, 0, 0] - - # --- zero arrays - out_lneg_ql[0, 0, 0] = 0.0 - out_lneg_qi[0, 0, 0] = 0.0 - out_lneg_qr[0, 0, 0] = 0.0 - out_lneg_qs[0, 0, 0] = 0.0 - - # --- tidy up very small cloud cover or total cloud water - expr1 = ql + qi - if expr1 < RLMIN or a < RAMIN: - # evaporate small cloud liquid water amounts - out_lneg_ql[0, 0, 0] += ql - qadj = ql / dt - out_tnd_loc_qv[0, 0, 0] += qadj - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - qv += ql - ql = 0.0 - - # evaporate small cloud ice water amounts - out_lneg_qi[0, 0, 0] += qi - qadj = qi / dt - out_tnd_loc_qv[0, 0, 0] += qadj - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qi - qi = 0.0 - - # set cloud cover to zero - a = 0.0 - - # --- tidy up small CLV variables: ql - if ql < RLMIN: - out_lneg_ql[0, 0, 0] += ql - qadj = ql / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQL == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQL == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += ql - ql = 0.0 - - # --- tidy up small CLV variables: qi - if qi < RLMIN: - out_lneg_qi[0, 0, 0] += qi - qadj = qi / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQI == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQI == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qi - qi = 0.0 - - # --- tidy up small CLV variables: qr - if qr < RLMIN: - out_lneg_qr[0, 0, 0] += qr - qadj = qr / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQR == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQR == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qr - qr = 0.0 - - # --- tidy up small CLV variables: qs - if qs < RLMIN: - out_lneg_qs[0, 0, 0] += qs - qadj = qs / dt - out_tnd_loc_qv[0, 0, 0] += qadj - if __INLINED(PHASEQS == 1): - out_tnd_loc_t[0, 0, 0] -= RALVDCP * qadj - elif __INLINED(PHASEQS == 2): - out_tnd_loc_t[0, 0, 0] -= RALSDCP * qadj - qv += qs - qs = 0.0 - - # --- define saturation values - # --- old *diagnostic* mixed phase saturation - foealfa = f_foealfa(t) - out_foealfa[0, 0, 0] = foealfa - foeewmt = min(f_foeewm(t) / in_ap[0, 0, 0], 0.5) - qsmix = foeewmt / (1 - RETV * foeewmt) - - # --- ice saturation T < 273K - # --- liquid water saturation for T > 273K - alfa = f_foedelta(t) - foeew = min((alfa * f_foeeliq(t) + (1 - alfa) * f_foeeice(t)) / in_ap[0, 0, 0], 0.5) - qsice = foeew / (1 - RETV * foeew) - - # --- liquid water saturation - foeeliqt = min(f_foeeliq(t) / in_ap[0, 0, 0], 0.5) - qsliq = foeeliqt / (1 - RETV * foeeliqt) - - # --- ensure cloud fraction is between 0 and 1 - a = max(0, min(1, a)) - - # --- calculate liq/ice fractions (no longer a diagnostic relationship) - li = ql + qi - if li > RLMIN: - liqfrac = ql / li - icefrac = 1 - liqfrac - else: - liqfrac = 0.0 - icefrac = 0.0 - - # === 2: constants and parameters - # --- find tropopause level - with computation(FORWARD), interval(0, 1): - tmp_trpaus[0, 0] = 0.1 - tmp_paphd[0, 0] = 1 / tmp_aph_s[0, 0] - with computation(FORWARD), interval(0, -1): - sig = in_ap[0, 0, 0] * tmp_paphd[0, 0] - if sig > 0.1 and sig < 0.4 and t[0, 0, 0] > t[0, 0, 1]: - tmp_trpaus[0, 0] = sig - - # === 3: physics - # --- main vertical loop - with computation(FORWARD): - with interval(0, NCLDTOP - 1): - # --- initialize variables - out_lude[0, 0, 0] = in_lude[0, 0, 0] - out_pfplsl[0, 0, 0] = 0.0 - out_pfplsi[0, 0, 0] = 0.0 - out_pfplsr[0, 0, 0] = 0.0 - out_pfplss[0, 0, 0] = 0.0 - pfplsv = 0.0 - out_qln[0, 0, 0] = 0.0 - out_qin[0, 0, 0] = 0.0 - out_qrn[0, 0, 0] = 0.0 - out_qsn[0, 0, 0] = 0.0 - qvn = 0.0 - anew = 0.0 - with interval(NCLDTOP - 1, None): - # *** 3.0: initialize variables - # --- first guess microphysics - qlfg = ql - qifg = qi - qrfg = qr - qsfg = qs - qvfg = qv - - convsink_ql = 0.0 - convsink_qi = 0.0 - convsink_qr = 0.0 - convsink_qs = 0.0 - convsrce_ql = 0.0 - convsrce_qi = 0.0 - convsrce_qr = 0.0 - convsrce_qs = 0.0 - convsrce_qv = 0.0 - fallsrce_ql = 0.0 - fallsrce_qi = 0.0 - fallsrce_qr = 0.0 - fallsrce_qs = 0.0 - index1_ql = True - index1_qi = True - index1_qr = True - index1_qs = True - index1_qv = True - index3_ql_ql = False - index3_ql_qi = False - index3_ql_qr = False - index3_ql_qs = False - index3_ql_qv = False - index3_qi_ql = False - index3_qi_qi = False - index3_qi_qr = False - index3_qi_qs = False - index3_qi_qv = False - index3_qr_ql = False - index3_qr_qi = False - index3_qr_qr = False - index3_qr_qs = False - index3_qr_qv = False - index3_qs_ql = False - index3_qs_qi = False - index3_qs_qr = False - index3_qs_qs = False - index3_qs_qv = False - index3_qv_ql = False - index3_qv_qi = False - index3_qv_qr = False - index3_qv_qs = False - index3_qv_qv = False - lcust_ql = 0.0 - lcust_qi = 0.0 - lcust_qr = 0.0 - lcust_qs = 0.0 - lcust_qv = 0.0 - ldefr = 0.0 - lfinalsum = 0.0 - order_ql = -999 - order_qi = -999 - order_qr = -999 - order_qs = -999 - order_qv = -999 - psupsatsrce_ql = 0.0 - psupsatsrce_qi = 0.0 - psupsatsrce_qr = 0.0 - psupsatsrce_qs = 0.0 - qpretot = 0.0 - solab = 0.0 - solac = 0.0 - solqa_ql_ql = 0.0 - solqa_ql_qi = 0.0 - solqa_ql_qr = 0.0 - solqa_ql_qs = 0.0 - solqa_ql_qv = 0.0 - solqa_qi_ql = 0.0 - solqa_qi_qi = 0.0 - solqa_qi_qr = 0.0 - solqa_qi_qs = 0.0 - solqa_qi_qv = 0.0 - solqa_qr_ql = 0.0 - solqa_qr_qi = 0.0 - solqa_qr_qr = 0.0 - solqa_qr_qs = 0.0 - solqa_qr_qv = 0.0 - solqa_qs_ql = 0.0 - solqa_qs_qi = 0.0 - solqa_qs_qr = 0.0 - solqa_qs_qs = 0.0 - solqa_qs_qv = 0.0 - solqa_qv_ql = 0.0 - solqa_qv_qi = 0.0 - solqa_qv_qr = 0.0 - solqa_qv_qs = 0.0 - solqa_qv_qv = 0.0 - solqb_ql_ql = 0.0 - solqb_ql_qi = 0.0 - solqb_ql_qr = 0.0 - solqb_ql_qs = 0.0 - solqb_ql_qv = 0.0 - solqb_qi_ql = 0.0 - solqb_qi_qi = 0.0 - solqb_qi_qr = 0.0 - solqb_qi_qs = 0.0 - solqb_qi_qv = 0.0 - solqb_qr_ql = 0.0 - solqb_qr_qi = 0.0 - solqb_qr_qr = 0.0 - solqb_qr_qs = 0.0 - solqb_qr_qv = 0.0 - solqb_qs_ql = 0.0 - solqb_qs_qi = 0.0 - solqb_qs_qr = 0.0 - solqb_qs_qs = 0.0 - solqb_qs_qv = 0.0 - solqb_qv_ql = 0.0 - solqb_qv_qi = 0.0 - solqb_qv_qr = 0.0 - solqb_qv_qs = 0.0 - solqb_qv_qv = 0.0 - - # derived variables needed - dp = in_aph[0, 0, 1] - in_aph[0, 0, 0] - gdp = RG / dp - rho = in_ap[0, 0, 0] / (RD * t) - dtgdp = dt * gdp - rdtgdp = dp / (RG * dt) - - # --- calculate dqs/dT correction factor - # liquid - facw = R5LES / (t - R4LES) ** 2 - cor = 1 / (1 - RETV * foeeliqt) - dqsliqdt = facw * cor * qsliq - corqsliq = 1 + RALVDCP * dqsliqdt - - # ice - faci = R5IES / (t - R4IES) ** 2 - cor = 1 / (1 - RETV * foeew) - dqsicedt = faci * cor * qsice - corqsice = 1 + RALSDCP * dqsicedt - - # diagnostic mixed - fac = out_foealfa[0, 0, 0] * facw + (1 - out_foealfa[0, 0, 0]) * faci - cor = 1 / (1 - RETV * foeewmt) - dqsmixdt = fac * cor * qsmix - corqsmix = 1 + f_foeldcpm(t) * dqsmixdt - - # evaporation/sublimation limits - evaplimmix = max((qsmix - qv) / corqsmix, 0.0) - evaplimice = max((qsice - qv) / corqsice, 0.0) - - # --- in-cloud condensate amount - tmpa = 1 / max(a, EPSEC) - liqcld = ql * tmpa - icecld = qi * tmpa - licld = liqcld + icecld - - # --- evaporate very small amounts of liquid... - if ql < RLMIN: - solqa_qv_ql += ql - solqa_ql_qv -= ql - - # --- ...and ice - if qi < RLMIN: - solqa_qv_qi += qi - solqa_qi_qv -= qi - - # *** 3.1: ice supersaturation adjustment - # --- supersaturation limit (from Koop) - fokoop = f_fokoop(t) - - if t >= RTT or NSSOPT == 0: - fac = 1.0 - faci = 1.0 - else: - fac = a + fokoop * (1 - a) - faci = dt / RKOOPTAU - - # calculate supersaturation to add to cloud - if a > 1 - RAMIN: - supsat = max((qv - fac * qsice) / corqsice, 0.0) - else: - # calculate environmental humidity supersaturation - qp1env = (qv - a * qsice) / max(1 - a, EPSILON) - supsat = max((1 - a) * (qp1env - fac * qsice) / corqsice, 0.0) - - # --- here the supersaturation is turned into liquid water - if supsat > EPSEC: - if t > RTHOMO: - # turn supersaturation into liquid water - solqa_ql_qv += supsat - solqa_qv_ql -= supsat - # include liquid in first guess - qlfg += supsat - else: - # turn supersaturation into ice water - solqa_qi_qv += supsat - solqa_qv_qi -= supsat - # add ice to first guess for deposition term - qifg += supsat - - # increase cloud amount using RKOOPTAU timescale - solac = (1 - a) * faci - - # --- include supersaturation from previous timestep - if in_supsat[0, 0, 0] > EPSEC: - if t > RTHOMO: - # turn supersaturation into liquid water - solqa_ql_ql += in_supsat[0, 0, 0] - psupsatsrce_ql = in_supsat[0, 0, 0] - # add liquid to first guess for deposition term - qlfg += in_supsat[0, 0, 0] - else: - # turn supersaturation into ice water - solqa_qi_qi += in_supsat[0, 0, 0] - psupsatsrce_qi = in_supsat[0, 0, 0] - # add ice to first guess for deposition term - qifg += in_supsat[0, 0, 0] - - # increase cloud amount using RKOOPTAU timescale - solac = (1 - a) * faci - - # *** 3.2: detrainment from convection - if tmp_klevel[0] < NLEV - 1: - out_lude[0, 0, 0] = in_lude[0, 0, 0] * dtgdp - - if in_convection_on[0, 0] and out_lude[0, 0, 0] > RLMIN and in_lu[0, 0, 1] > EPSEC: - solac += out_lude[0, 0, 0] / in_lu[0, 0, 1] - # diagnostic temperature split - convsrce_ql = out_foealfa[0, 0, 0] * out_lude[0, 0, 0] - convsrce_qi = (1 - out_foealfa[0, 0, 0]) * out_lude[0, 0, 0] - solqa_ql_ql += convsrce_ql - solqa_qi_qi += convsrce_qi - else: - out_lude[0, 0, 0] = 0.0 - - # convective snow detrainment source - if in_convection_on[0, 0]: - solqa_qs_qs += in_snde[0, 0, 0] * dtgdp - else: - out_lude[0, 0, 0] = in_lude[0, 0, 0] - - # *** 3.3: subsidence compensating convective updraughts - # --- subsidence source from layer above and evaporation of cloud within the layer - if tmp_klevel[0] > NCLDTOP - 1: - mf = max(0.0, (in_mfu + in_mfd) * dtgdp) - acust = mf * anew[0, 0, -1] - - if __INLINED(not FALLQL and PHASEQL > 0): - lcust_ql = mf * out_qln[0, 0, -1] - # record total flux for enthalpy budget - convsrce_ql += lcust_ql - - if __INLINED(not FALLQI and PHASEQI > 0): - lcust_qi = mf * out_qin[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qi += lcust_qi - - if __INLINED(not FALLQR and PHASEQR > 0): - lcust_qr = mf * out_qrn[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qr += lcust_qr - - if __INLINED(not FALLQS and PHASEQS > 0): - lcust_qs = mf * out_qsn[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qs += lcust_qs - - if __INLINED(not FALLQV and PHASEQV > 0): - lcust_qv = mf * qvn[0, 0, -1] - # record total flux for enthalpy budget - convsrce_qv += lcust_qv - - # work out how much liquid evaporates at arrival point - dtdp = RDCP * 0.5 * (t[0, 0, -1] + t[0, 0, 0]) / in_aph[0, 0, 0] - dtforc = dtdp[0, 0, 0] * (in_ap[0, 0, 0] - in_ap[0, 0, -1]) - dqs = anew[0, 0, -1] * dtforc * dqsmixdt - - if __INLINED(not FALLQL and PHASEQL > 0): - lfinal = max(0.0, lcust_ql - dqs) - evap = min(lcust_ql - lfinal, evaplimmix) - lfinal = lcust_ql - evap - lfinalsum += lfinal - solqa_ql_ql += lcust_ql - solqa_qv_ql += evap - solqa_ql_qv -= evap - - if __INLINED(not FALLQI and PHASEQI > 0): - lfinal = max(0.0, lcust_qi - dqs) - evap = min(lcust_qi - lfinal, evaplimmix) - lfinal = lcust_qi - evap - lfinalsum += lfinal - solqa_qi_qi += lcust_qi - solqa_qv_qi += evap - solqa_qi_qv -= evap - - if __INLINED(not FALLQR and PHASEQR > 0): - lfinal = max(0.0, lcust_qr - dqs) - evap = min(lcust_qr - lfinal, evaplimmix) - lfinal = lcust_qr - evap - lfinalsum += lfinal - solqa_qr_qr += lcust_qr - solqa_qv_qr += evap - solqa_qr_qv -= evap - - if __INLINED(not FALLQS and PHASEQS > 0): - lfinal = max(0.0, lcust_qs - dqs) - evap = min(lcust_qs - lfinal, evaplimmix) - lfinal = lcust_qs - evap - lfinalsum += lfinal - solqa_qs_qs += lcust_qs - solqa_qv_qs += evap - solqa_qs_qv -= evap - - if __INLINED(not FALLQV and PHASEQV > 0): - lfinal = max(0.0, lcust_qv - dqs) - evap = min(lcust_qv - lfinal, evaplimmix) - lfinal = lcust_qv - evap - lfinalsum += lfinal - solqa_qv_qv += lcust_qv - - # reset the cloud contribution if no cloud water survives to this level - if lfinalsum < EPSEC: - acust = 0.0 - solac += acust - - # --- subsidence sink of cloud to the layer below - if tmp_klevel[0] < NLEV - 1: - mfdn = max(0.0, (in_mfu[0, 0, 1] + in_mfd[0, 0, 1]) * dtgdp) - solab += mfdn - solqb_ql_ql += mfdn - solqb_qi_qi += mfdn - - # record sink for cloud budget and enthalpy budget diagnostics - convsink_ql = mfdn - convsink_qi = mfdn - - # *** 3.4: erosion of clouds by turbulent mixing - # --- define turbulent erosion rate - ldifdt = RCLDIFF * dt - if in_convection_type[0, 0] > 0 and out_lude[0, 0, 0] > EPSEC: - ldifdt *= RCLDIFF_CONVI - - if li > EPSEC: - # calculate environmental humidity - e = ldifdt * max(qsmix - qv, 0.0) - leros = min(min(a * e, evaplimmix), li) - aeros = leros / licld - - # erosion is -ve linear in L, A - solac -= aeros - solqa_qv_ql += liqfrac * leros - solqa_ql_qv -= liqfrac * leros - solqa_qv_qi += icefrac * leros - solqa_qi_qv -= icefrac * leros - - # *** 3.5: condensation/evaporation due to dqsat/dT - dtdp = RDCP * t / in_ap[0, 0, 0] - dpmxdt = dp / dt - mfdn = in_mfu[0, 0, 1] + in_mfd[0, 0, 1] if tmp_klevel[0] < NLEV - 1 else 0.0 - wtot = in_w[0, 0, 0] + 0.5 * RG * (in_mfu[0, 0, 0] + in_mfd[0, 0, 0] + mfdn) - wtot = min(dpmxdt, max(-dpmxdt, wtot)) - zzdt = in_hrsw[0, 0, 0] + in_hrlw[0, 0, 0] - dtdiab = min(dpmxdt * dtdp, max(-dpmxdt * dtdp, zzdt)) * dt + RALFDCP * ldefr - dtforc = dtdp * wtot * dt + dtdiab - qold = qsmix - told = t - t = max(t + dtforc, 160.0) - - qsmix, t = f_cuadjtq(in_ap, qsmix, t) - - dqs = qsmix - qold - qsmix = qold - t = told - - # ***: 3.5a: evaporation of clouds - if dqs > 0: - levap = min(min(a * min(dqs, licld), evaplimmix), max(qsmix - qv, 0.0)) - solqa_qv_ql += liqfrac * levap - solqa_ql_qv -= liqfrac * levap - solqa_qv_qi += icefrac * levap - solqa_qi_qv -= icefrac * levap - - # *** 3.5b: formation of clouds - # increase of cloud water in existing clouds - if a > EPSEC and dqs <= -RLMIN: - lcond1 = max(-dqs, 0.0) - - # old limiter - if a > 0.99: - cor = 1 / (1 - RETV * qsmix) - cdmax = (qv - qsmix) / (1 + cor * qsmix * f_foedem(t)) - else: - cdmax = (qv - a * qsmix) / a - - lcond1 = a * max(min(lcond1, cdmax), 0.0) - if lcond1 < RLMIN: - lcond1 = 0.0 - - # --- all increase goes into liquid unless so cold cloud homogeneously freezes - if t > RTHOMO: - solqa_ql_qv += lcond1 - solqa_qv_ql -= lcond1 - qlfg += lcond1 - else: - solqa_qi_qv += lcond1 - solqa_qv_qi -= lcond1 - qifg += lcond1 - - # generation of new clouds (da/dt > 0) - if dqs <= -RLMIN and a < 1 - EPSEC: - # --- critical relative humidity - rhc = RAMID - sigk = in_ap[0, 0, 0] / tmp_aph_s[0, 0] - if sigk > 0.8: - rhc += (1 - RAMID) * ((sigk - 0.8) / 0.2) ** 2 - - # --- supersaturation options - if __INLINED(NSSOPT == 0): - # no scheme - qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a)) - elif __INLINED(NSSOPT == 1): - # Tompkins - qe = max(0.0, (qv - a * qsice) / max(EPSEC, 1 - a)) - elif __INLINED(NSSOPT == 2): - # Lohmann and Karcher - qe = qv - else: - # Gierens - qe = qv + li - - if t >= RTT or NSSOPT == 0: - # no ice supersaturation allowed - fac = 1.0 - else: - # ice supersaturation - fac = fokoop - - if qe >= rhc * qsice * fac and qe < qsice * fac: - acond = -(1 - a) * fac * dqs / max(2 * (fac * qsice - qe), EPSEC) - acond = min(acond, 1 - a) - - # linear term - lcond2 = -fac * dqs * 0.5 * acond - - # new limiter formulation - zdl = 2 * (fac * qsice - qe) / max(EPSEC, 1 - a) - expr2 = fac * dqs - if expr2 < -zdl: - lcondlim = (a - 1) * expr2 - fac * qsice + qv - lcond2 = min(lcond2, lcondlim) - lcond2 = max(lcond2, 0.0) - - expr10 = 1 - a - if lcond2 < RLMIN or expr10 < EPSEC: - lcond2 = 0.0 - acond = 0.0 - if lcond2 == 0.0: - acond = 0.0 - - # large-scale generation is linear in A and linear in L - solac += acond - - # --- all increase goes into liquid unless so cold cloud homogeneously freezes - if t > RTHOMO: - solqa_ql_qv += lcond2 - solqa_qv_ql -= lcond2 - qlfg += lcond2 - else: # homogeneous freezing - solqa_qi_qv += lcond2 - solqa_qv_qi -= lcond2 - qifg += lcond2 - - # *** 3.6: growth of ice by vapour deposition - if __INLINED(DEPICE == 1): # --- ice deposition following Rotstayn et al. (2001) - # --- calculate distance from cloud top - if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF: - tmp_cldtopdist[0, 0] = 0.0 - else: - tmp_cldtopdist[0, 0] += dp / (rho * RG) - - # --- only treat depositional growth if liquid present - if t < RTT and qlfg > RLMIN: - vpice = f_foeeice(t) * RV / RD - vpliq = vpice * fokoop - icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639) - - # --- 0.024 is conductivity of air - # --- 8.8 = 700 ** (1/3) = density of ice to the third - add = RLSTT * (RLSTT / (RV * t) - 1) / (0.024 * t) - bdd = RV * t * in_ap[0, 0, 0] / (2.21 * vpice) - cvds = ( - 7.8 - * (icenuclei / rho) ** 0.666 - * (vpliq - vpice) - / (8.87 * (add + bdd) * vpice) - ) - - # --- RICEINIT = 1e-12 is initial mass of ice particle - ice0 = max(icecld, icenuclei * RICEINIT / rho) - - # --- new value of ice - inew = (0.666 * cvds * dt + ice0**0.666) ** 1.5 - - # --- grid-mean deposition rate - depos = max(a * (inew - ice0), 0.0) - - # --- limit deposition to liquid water amount - depos = min(depos, qlfg) - - # --- at top of cloud, reduce deposition rate near cloud top - infactor = min(icenuclei / 15000, 1.0) - depos *= min( - infactor - + (1 - infactor) - * (RDEPLIQREFRATE + tmp_cldtopdist[0, 0] / RDEPLIQREFDEPTH), - 1.0, - ) - - # --- add to matrix - solqa_qi_ql += depos - solqa_ql_qi -= depos - qifg += depos - qlfg -= depos - elif __INLINED(DEPICE == 2): # --- ice deposition assuming ice PSD - # --- calculate distance from cloud top - if a[0, 0, -1] < RCLDTOPCF and a[0, 0, 0] >= RCLDTOPCF: - tmp_cldtopdist = 0.0 - else: - tmp_cldtopdist += dp / (rho * RG) - - # --- only treat depositional growth if liquid present - if t < RTT and qlfg > RLMIN: - vpice = f_foeeice(t) * RV / RD - vpliq = vpice * fokoop - icenuclei = 1000 * exp(12.96 * (vpliq - vpice) / vpliq - 0.639) - - # --- RICEINIT=1e-12 is the initial mass of ice particle - ice0 = max(icecld, icenuclei * RICEINIT / rho) - - # particle size distribution - tcg = 1 - facx1i = 1 - apb = RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap * RCL_APB3 * t**3 - corrfac = (1 / rho) ** 0.5 - corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120) - pr02 = rho * ice0 * RCL_CONST1I / (tcg * facx1i) - term1 = ( - (vpliq - vpice) - * t**2 - * vpice - * corrfac2 - * tcg - * RCL_CONST2I - * facx1i - / (rho * apb * vpice) - ) - term2 = ( - 0.65 * RCL_CONST6I * pr02**RCL_CONST4I - + RCL_CONST3I - * corrfac**0.5 - * rho**0.5 - * pr02**RCL_CONST5I - / corrfac2**0.5 - ) - depos = max(a * term1 * term2 * dt, 0.0) - - # --- limit deposition to liquid water amount - depos = min(depos, qlfg) - - # --- at top of cloud, reduce deposition rate near cloud top to account for - # --- small scale turbulent processes - infactor = min(icenuclei / 15000, 1.0) - depos *= min( - infactor - + (1 - infactor) * (RDEPLIQREFRATE + tmp_cldtopdist / RDEPLIQREFDEPTH), - 1.0, - ) - - # --- add to matrix - solqa_qi_ql += depos - solqa_ql_qi -= depos - qifg += depos - qlfg -= depos - - # === 4: precipitation processes - # --- revise in-cloud condensate amount - tmpa = 1 / max(a, EPSEC) - liqcld = qlfg * tmpa - icecld = qifg * tmpa - - # *** 4.1a: sedimentation/falling of ql - if __INLINED(FALLQL): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_ql = out_pfplsl[0, 0, -1] * dtgdp - solqa_ql_ql += fallsrce_ql - qlfg += fallsrce_ql - # use first guess precip - qpretot += qlfg - - # --- sink to next layer, constant fall speed - fallsink_ql = dtgdp * VQL * rho - else: - fallsink_ql = 0.0 - - # *** 4.1b: sedimentation/falling of qi - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qi = out_pfplsi[0, 0, -1] * dtgdp - solqa_qi_qi += fallsrce_qi - qifg += fallsrce_qi - # use first guess precip - qpretot += qifg - - # --- sink to next layer, constant fall speed - if __INLINED(LAERICESED): - vqi = 0.002 * in_re_ice[0, 0, 0] - else: - vqi = VQI - fallsink_qi = dtgdp * vqi * rho - - # *** 4.1c: sedimentation/falling of qr - if __INLINED(FALLQR): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qr = out_pfplsr[0, 0, -1] * dtgdp - solqa_qr_qr += fallsrce_qr - qrfg += fallsrce_qr - # use first guess precip - qpretot += qrfg - - # --- sink to next layer, constant fall speed - fallsink_qr = dtgdp * VQR * rho - else: - fallsink_qr = 0.0 - - # *** 4.1d: sedimentation/falling of qs - if __INLINED(FALLQS): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qs = out_pfplss[0, 0, -1] * dtgdp - solqa_qs_qs += fallsrce_qs - qsfg += fallsrce_qs - # use first guess precip - qpretot += qsfg - - # --- sink to next layer, constant fall speed - fallsink_qs = dtgdp * VQS * rho - else: - fallsink_qs = 0.0 - - # *** 4.1e: sedimentation/falling of qv - if __INLINED(FALLQV): - # --- source from layer above - if tmp_klevel[0] > NCLDTOP - 1: - fallsrce_qv = pfplsv[0, 0, -1] * dtgdp - solqa_qv_qv += fallsrce_qv - qvfg += fallsrce_qv - # use first guess precip - qpretot += qvfg - - # --- sink to next layer, constant fall speed - fallsink_qv = dtgdp * VQV * rho - else: - fallsink_qv = 0.0 - - # --- precip cover overlap using RAX-RAN Overlap - if qpretot > EPSEC: - tmp_covptot[0, 0] = 1 - ( - (1 - tmp_covptot[0, 0]) - * (1 - max(a[0, 0, 0], a[0, 0, -1])) - / (1 - min(a[0, 0, -1], 1 - 1e-6)) - ) - tmp_covptot[0, 0] = max(tmp_covptot[0, 0], RCOVPMIN) - covpclr = max(0.0, tmp_covptot[0, 0] - a) - raincld = qrfg / tmp_covptot[0, 0] - snowcld = qsfg / tmp_covptot[0, 0] - tmp_covpmax[0, 0] = max(tmp_covptot[0, 0], tmp_covpmax[0, 0]) - else: - raincld = 0.0 - snowcld = 0.0 - tmp_covptot[0, 0] = 0.0 - covpclr = 0.0 - tmp_covpmax[0, 0] = 0.0 - - # *** 4.2a: autoconversion to snow - if t <= RTT: - # --- snow autoconversion rate follow Lin et al. 1983 - if icecld > EPSEC: - co = dt * RSNOWLIN1 * exp(RSNOWLIN2 * (t - RTT)) - - if __INLINED(LAERICEAUTO): - lcrit = in_icrit_aer[0, 0, 0] - co *= (RNICE / in_nice[0, 0, 0]) ** 0.333 - else: - lcrit = RLCRITSNOW - - snowaut = co * (1 - exp(-((icecld / lcrit) ** 2))) - solqb_qs_qi += snowaut - - # *** 4.2b: autoconversion warm clouds - if liqcld > EPSEC: - if __INLINED(WARMRAIN == 1): # --- warm-rain process follow Sundqvist (1989) - co = RKCONV * dt - - if __INLINED(LAERLIQAUTOLSP): - lcrit = in_lcrit_aer[0, 0, 0] - co *= (RCCN / in_ccn[0, 0, 0]) ** 0.333 - else: - lcrit = RCLCRIT_LAND if in_lsm[0, 0] > 0.5 else RCLCRIT_SEA - - # --- parameters for cloud collection by rain and snow - precip = (out_pfplss[0, 0, -1] + out_pfplsr[0, 0, -1]) / max( - EPSEC, tmp_covptot[0, 0] - ) - cfpr = 1 + RPRC1 * sqrt(max(precip, 0.0)) - if __INLINED(LAERLIQCOLL): - cfpr *= (RCCN / in_ccn[0, 0, 0]) ** 0.333 - - co *= cfpr - lcrit /= max(cfpr, EPSEC) - - rainaut = co - if liqcld / lcrit < 20: - rainaut *= 1 - exp(-((liqcld / lcrit) ** 2)) - - # rain freezes instantly - if t <= RTT: - solqb_qs_ql += rainaut - else: - solqb_qr_ql += rainaut - elif __INLINED( - WARMRAIN == 2 - ): # --- warm-rain process follow Khairoutdinov and Kogan (2000) - if in_lsm[0, 0] > 0.5: - const = RCL_KK_cloud_num_land - lcrit = RCLCRIT_LAND - else: - const = RCL_KK_cloud_num_sea - lcrit = RCLCRIT_SEA - - if liqcld > lcrit: - rainaut = ( - 1.5 * a * dt * RCL_KKAau * liqcld**RCL_KKBauq * const**RCL_KKBaun - ) - rainaut = min(rainaut, qlfg) - if rainaut < EPSEC: - rainaut = 0.0 - rainacc = 2 * a * dt * RCL_KKAac * (liqcld * raincld) ** RCL_KKBac - rainacc = min(rainacc, qlfg) - if rainacc < EPSEC: - rainacc = 0.0 - else: - rainaut = 0.0 - rainacc = 0.0 - - expr3 = rainaut + rainacc - if t <= RTT: - solqa_qs_ql += expr3 - solqa_ql_qs -= expr3 - else: - solqa_qr_ql += expr3 - solqa_ql_qr -= expr3 - - # --- riming - collection of cloud liquid drops by snow and ice - if __INLINED(WARMRAIN > 1): - if t <= RTT and liqcld > EPSEC: - # fallspeed air density correction - fallcorr = (RDENSREF / rho) ** 0.4 - - # --- riming of snow by cloud water - implicit in lwc - if snowcld > EPSEC and tmp_covptot[0, 0] > 0.01: - # calculate riming term - snowrime = ( - 0.3 - * tmp_covptot[0, 0] - * dt - * RCL_CONST7S - * fallcorr - * (rho * snowcld * RCL_CONST1S) ** RCL_CONST8S - ) - - # limit snow riming term - snowrime = min(snowrime, 1.0) - - solqb_qs_ql += snowrime - - # *** 4.3a: melting of snow and ice - icetot = qifg + qsfg - meltmax = 0.0 - - # if there are frozen hydrometeors present and dry-bulb temperature > 0degC - if icetot > EPSEC and t > RTT: - # calculate subsaturation - subsat = max(qsice - qv, 0.0) - - # calculate difference between dry-bulb and the temperature at which the wet-buld=0degC - # using and approx - tdmtw0 = t - RTT - subsat * (TW1 + TW2 * (in_ap[0, 0, 0] - TW3) - TW4 * (t - TW5)) - - # ensure cons1 is positive - cons1 = abs(dt * (1 + 0.5 * tdmtw0) / RTAUMEL) - meltmax = max(tdmtw0 * cons1 * RLDCP, 0.0) - - if meltmax > EPSEC and icetot > EPSEC: - # apply melting in same proportion as frozen hydrometeor fractions - alfa_qi = qifg / icetot - melt_qi = min(qifg, alfa_qi * meltmax) - alfa_qs = qsfg / icetot - melt_qs = min(qsfg, alfa_qs * meltmax) - - # needed in first guess - qifg -= melt_qi - qrfg += melt_qi + melt_qs - qsfg -= melt_qs - solqa_qi_qr -= melt_qi - solqa_qr_qi += melt_qi - solqa_qr_qs += melt_qs - solqa_qs_qr -= melt_qs - - # *** 4.3b: freezing of rain - if qr > EPSEC: - if t[0, 0, 0] <= RTT and t[0, 0, -1] > RTT: - # base of melting layer/top of refreezing layer so store rain/snow fraction for - # precip type diagnosis - qpretot = max(qs + qr, EPSEC) - out_rainfrac_toprfz[0, 0] = qr / qpretot - tmp_rainliq[0, 0] = out_rainfrac_toprfz[0, 0] > 0.8 - - if t < RTT: - if tmp_rainliq[0, 0]: - # majority of raindrops completely melted - # slope of rain partical size distribution - lambda_ = (RCL_FAC1 / (rho * qr)) ** RCL_FAC2 - - # calculate freezing rate based on Bigg (1953) and Wisner (1972) - temp = RCL_FZRAB * (t - RTT) - frz = dt * (RCL_CONST5R / rho) * (exp(temp) - 1) * lambda_**RCL_CONST6R - frzmax = max(frz, 0.0) - else: - # majority of raindrops only partially melted - cons1 = abs(dt * (1 + 0.5 * (RTT - t)) / RTAUMEL) - frzmax = max((RTT - t) * cons1 * RLDCP, 0.0) - - if frzmax > EPSEC: - frz = min(qr, frzmax) - solqa_qs_qr += frz - solqa_qr_qs -= frz - - # *** 4.3c: freezing of liquid - frzmax = max((RTHOMO - t) * RLDCP, 0.0) - if frzmax > EPSEC and qlfg > EPSEC: - frz = min(qlfg, frzmax) - solqa_qi_ql += frz - solqa_ql_qi -= frz - - # *** 4.4: evaporation of rain/snow - if __INLINED(EVAPRAIN == 1): # --- rain evaporation scheme from Sundquist - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - qe = (qv - a * qsliq) / max(EPSEC, 1 - a) - - # --- humidity in moistest covpclr part of domain - qe = max(0.0, min(qe, qsliq)) - lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq - if lo1: - # note: preclr is a rain flux - expr4 = tmp_covptot[0, 0] * dtgdp - expr5 = max(abs(expr4), EPSILON) - expr6 = expr5 if expr4 > 0 else -expr5 - preclr = qrfg * covpclr / expr6 - - # --- actual microphysics formula in beta - beta1 = ( - sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0]) - / RVRFACTOR - * preclr - / max(covpclr, EPSEC) - ) - beta = RG * RPECONS * 0.5 * beta1**0.5777 - denom = 1 + beta * dt * corqsliq - dpr = covpclr * beta * (qsliq - qe) / denom * dp / RG - dpevap = dpr * dtgdp - - # --- add evaporation term to explicit sink - evap = min(dpevap, qrfg) - solqa_qv_qr += evap - solqa_qr_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, - tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg), - ) - - # update fg field - qrfg -= evap - elif __INLINED( - EVAPRAIN == 2 - ): # --- rain evaporation scheme based on Abel and Boutle (2013) - # --- calculate relative humidity limit for rain evaporation - # limit rh for rain evaporation dependent on precipitation fraction - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - - # further limit rh for rain evaporation to 80% - rh = min(0.8, rh) - - qe = max(0.0, min(qv, qsliq)) - lo1 = covpclr > EPSEC and qrfg > EPSEC and qe < rh * qsliq - if lo1: - # --- Abel and Boutle (2012) evaporation - # calculate local precipitation (kg/kg) - preclr = qrfg / tmp_covptot[0, 0] - - # fallspeed air density correction - fallcorr = (RDENSREF / rho) ** 0.4 - - # saturation vapor pressure with respect to liquid phase - esatliq = RV / RD * f_foeeliq(t) - - # slope of particle size distribution - lambda_ = (RCL_FAC1 / (rho * preclr)) ** RCL_FAC2 - - evap_denom = ( - RCL_CDENOM1 * esatliq - - RCL_CDENOM2 * t * esatliq - + RCL_CDENOM3 * t**3 * in_ap[0, 0, 0] - ) - - # temperature dependent conductivity - corr2 = (t / 273) ** 1.5 * 393 / (t + 120) - - subsat = max(rh * qsliq - qe, 0.0) - beta = ( - 0.5 - / qsliq - * t**2 - * esatliq - * RCL_CONST1R - * (corr2 / evap_denom) - * ( - 0.78 / lambda_**RCL_CONST4R - + RCL_CONST2R - * (rho * fallcorr) ** 0.5 - / (corr2**0.5 * lambda_**RCL_CONST3R) - ) - ) - denom = 1 + beta * dt - dpevap = covpclr * beta * dt * subsat / denom - - # --- add evaporation term to explicit sink - evap = min(dpevap, qrfg) - solqa_qv_qr += evap - solqa_qr_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, - tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qrfg), - ) - - # update fg field - qrfg -= evap - - # *** 4.5: evaporation of snow - if __INLINED(EVAPSNOW == 1): - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - qe = (qv - a * qsice) / max(EPSEC, 1 - a) - - # --- humidity in moistest covpclr part of domain - qe = max(0.0, min(qe, qsice)) - lo1 = covpclr > EPSEC and qsfg > EPSEC and qe < rh * qsice - if lo1: - expr7 = tmp_covptot[0, 0] * dtgdp - expr8 = max(abs(expr7), EPSILON) - expr9 = expr8 if expr7 > 0 else -expr8 - preclr = qsfg * covpclr / expr9 - - # --- actual microphysics formula in beta - beta1 = ( - sqrt(in_ap[0, 0, 0] / tmp_aph_s[0, 0]) - / RVRFACTOR - * preclr - / max(covpclr, EPSEC) - ) - beta = RG * RPECONS * beta1**0.5777 - denom = 1 + beta * dt * corqsice - dpr = covpclr * beta * (qsice - qe) / denom * dp / RG - dpevap = dpr * dtgdp - - # --- add evaporation term to explicit sink - evap = min(dpevap, qsfg) - solqa_qv_qs += evap - solqa_qs_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, - tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qsfg), - ) - - # update first guess field - qsfg -= evap - elif __INLINED(EVAPSNOW == 2): - # --- calculate relative humidity limit for snow evaporation - rh = RPRECRHMAX + (1 - RPRECRHMAX) * tmp_covpmax[0, 0] / max(EPSEC, 1 - a) - rh = min(max(rh, RPRECRHMAX), 1.0) - qe = (qv - a * qsice) / max(EPSEC, 1 - a) - - # --- humidity in moistest covpclr part of domain - qe = max(0.0, min(qe, qsice)) - lo1 = covpclr > EPSEC and qs > EPSEC and qe < rh * qsice - if lo1: - # calculate local precipitation (kg/kg) - preclr = qsfg / tmp_covptot[0, 0] - vpice = f_foeeice(t) * RV / RD - - # particle size distribution - tcg = 1.0 - facx1s = 1.0 - apb = ( - RCL_APB1 * vpice - RCL_APB2 * vpice * t + in_ap[0, 0, 0] * RCL_APB3 * t**3 - ) - corrfac = (1 / rho) ** 0.5 - corrfac2 = ((t / 273) ** 1.5) * 393 / (t + 120) - pr02 = rho * preclr * RCL_CONST1S / (tcg * facx1s) - term1 = ( - (qsice - qe) - * t**2 - * vpice - * corrfac2 - * tcg - * RCL_CONST2S - * facx1s - / (rho * apb * qsice) - ) - term2 = ( - 0.65 * RCL_CONST6S * pr02**RCL_CONST4S - + RCL_CONST3S - * corrfac**0.5 - * rho**0.5 - * pr02**RCL_CONST5S - / corrfac2**0.5 - ) - dpevap = max(covpclr * term1 * term2 * dt, 0.0) - - # --- limit evaporation to snow amount - evap = min(min(dpevap, evaplimice), qs) - solqa_qv_qs += evap - solqa_qs_qv -= evap - - # --- reduce the total precip coverage proportional to evaporation - tmp_covptot[0, 0] = max( - RCOVPMIN, tmp_covptot[0, 0] - max(0.0, (tmp_covptot[0, 0] - a) * evap / qs) - ) - - # update first guess field - qsfg -= evap - - # --- evaporate small precipitation amounts - if __INLINED(FALLQL): - if qlfg < RLMIN: - solqa_qv_ql += qlfg - solqa_ql_qv -= qlfg - if __INLINED(FALLQI): - if qifg < RLMIN: - solqa_qv_qi += qifg - solqa_qi_qv -= qifg - if __INLINED(FALLQR): - if qrfg < RLMIN: - solqa_qv_qr += qrfg - solqa_qr_qv -= qrfg - if __INLINED(FALLQS): - if qsfg < RLMIN: - solqa_qv_qs += qsfg - solqa_qs_qv -= qsfg - - # === 5: solvers for A and L - # *** 5.1: solver for cloud cover - anew = min((a + solac) / (1 + solab), 1.0) - if anew < RAMIN: - anew = 0.0 - da = anew - a0 - - # *** 5.2: solver for the microphysics - # --- collect sink terms and mark - sinksum_ql = -(solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv) - sinksum_qi = -(solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv) - sinksum_qr = -(solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv) - sinksum_qs = -(solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv) - sinksum_qv = -(solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv) - - # --- calculate overshoot and scaling factor - max_ql = max(ql, EPSEC) - rat_ql = max(sinksum_ql, max_ql) - ratio_ql = max_ql / rat_ql - max_qi = max(qi, EPSEC) - rat_qi = max(sinksum_qi, max_qi) - ratio_qi = max_qi / rat_qi - max_qr = max(qr, EPSEC) - rat_qr = max(sinksum_qr, max_qr) - ratio_qr = max_qr / rat_qr - max_qs = max(qs, EPSEC) - rat_qs = max(sinksum_qs, max_qs) - ratio_qs = max_qs / rat_qs - max_qv = max(qv, EPSEC) - rat_qv = max(sinksum_qv, max_qv) - ratio_qv = max_qv / rat_qv - - # --- now sort ratio to find out which species run out first - order_ql, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_ql, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qi, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qi, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qr, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qr, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qs, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qs, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - order_qv, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv = f_helper_0( - order_qv, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - ) - - # scale the sink terms, in the correct order, recalculating the scale factor each time - sinksum_ql = 0.0 - sinksum_qi = 0.0 - sinksum_qr = 0.0 - sinksum_qs = 0.0 - sinksum_qv = 0.0 - - # --- recalculate sum and scaling factor, and then scale - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_ql, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qi, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qr, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qs, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv = f_helper_1( - order_qv, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, - ) - - # *** 5.2.2: solver - # --- set the lhs of equation - # --- diagonals: microphysical sink terms + transport - lhs_ql_ql = ( - 1 - + fallsink_ql - + solqb_qv_ql - + solqb_ql_ql - + solqb_qi_ql - + solqb_qr_ql - + solqb_qs_ql - ) - lhs_qi_qi = ( - 1 - + fallsink_qi - + solqb_qv_qi - + solqb_ql_qi - + solqb_qi_qi - + solqb_qr_qi - + solqb_qs_qi - ) - lhs_qr_qr = ( - 1 - + fallsink_qr - + solqb_qv_qr - + solqb_ql_qr - + solqb_qi_qr - + solqb_qr_qr - + solqb_qs_qr - ) - lhs_qs_qs = ( - 1 - + fallsink_qs - + solqb_qv_qs - + solqb_ql_qs - + solqb_qi_qs - + solqb_qr_qs - + solqb_qs_qs - ) - lhs_qv_qv = ( - 1 - + fallsink_qv - + solqb_qv_qv - + solqb_ql_qv - + solqb_qi_qv - + solqb_qr_qv - + solqb_qs_qv - ) - - # --- non-diagonals: microphysical source terms - lhs_ql_qi = -solqb_ql_qi - lhs_ql_qr = -solqb_ql_qr - lhs_ql_qs = -solqb_ql_qs - lhs_ql_qv = -solqb_ql_qv - lhs_qi_ql = -solqb_qi_ql - lhs_qi_qr = -solqb_qi_qr - lhs_qi_qs = -solqb_qi_qs - lhs_qi_qv = -solqb_qi_qv - lhs_qr_ql = -solqb_qr_ql - lhs_qr_qi = -solqb_qr_qi - lhs_qr_qs = -solqb_qr_qs - lhs_qr_qv = -solqb_qr_qv - lhs_qs_ql = -solqb_qs_ql - lhs_qs_qi = -solqb_qs_qi - lhs_qs_qr = -solqb_qs_qr - lhs_qs_qv = -solqb_qs_qv - lhs_qv_ql = -solqb_qv_ql - lhs_qv_qi = -solqb_qv_qi - lhs_qv_qr = -solqb_qv_qr - lhs_qv_qs = -solqb_qv_qs - - # --- set the rhs of equation - # --- sum the explicit source and sink - out_qln[0, 0, 0] = ( - ql + solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv - ) - out_qin[0, 0, 0] = ( - qi + solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv - ) - out_qrn[0, 0, 0] = ( - qr + solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv - ) - out_qsn[0, 0, 0] = ( - qs + solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv - ) - qvn = qv + solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv - - # --- solve by LU decomposition - # non pivoting recursive factorization - lhs_qi_ql /= lhs_ql_ql # JN=1, JM=2 - lhs_qi_qi -= lhs_qi_ql * lhs_ql_qi # JN=1, JM=2, IK=2 - lhs_qi_qr -= lhs_qi_ql * lhs_ql_qr # JN=1, JM=2, IK=3 - lhs_qi_qs -= lhs_qi_ql * lhs_ql_qs # JN=1, JM=2, IK=4 - lhs_qi_qv -= lhs_qi_ql * lhs_ql_qv # JN=1, JM=2, IK=0 - lhs_qr_ql /= lhs_ql_ql # JN=1, JM=3 - lhs_qr_qi -= lhs_qr_ql * lhs_ql_qi # JN=1, JM=3, IK=2 - lhs_qr_qr -= lhs_qr_ql * lhs_ql_qr # JN=1, JM=3, IK=3 - lhs_qr_qs -= lhs_qr_ql * lhs_ql_qs # JN=1, JM=3, IK=4 - lhs_qr_qv -= lhs_qr_ql * lhs_ql_qv # JN=1, JM=3, IK=0 - lhs_qs_ql /= lhs_ql_ql # JN=1, JM=4 - lhs_qs_qi -= lhs_qs_ql * lhs_ql_qi # JN=1, JM=4, IK=2 - lhs_qs_qr -= lhs_qs_ql * lhs_ql_qr # JN=1, JM=4, IK=3 - lhs_qs_qs -= lhs_qs_ql * lhs_ql_qs # JN=1, JM=4, IK=4 - lhs_qs_qv -= lhs_qs_ql * lhs_ql_qv # JN=1, JM=4, IK=0 - lhs_qv_ql /= lhs_ql_ql # JN=1, JM=0 - lhs_qv_qi -= lhs_qv_ql * lhs_ql_qi # JN=1, JM=0, IK=2 - lhs_qv_qr -= lhs_qv_ql * lhs_ql_qr # JN=1, JM=0, IK=3 - lhs_qv_qs -= lhs_qv_ql * lhs_ql_qs # JN=1, JM=0, IK=4 - lhs_qv_qv -= lhs_qv_ql * lhs_ql_qv # JN=1, JM=0, IK=0 - lhs_qr_qi /= lhs_qi_qi # JN=2, JM=3 - lhs_qr_qr -= lhs_qr_qi * lhs_qi_qr # JN=2, JM=3, IK=3 - lhs_qr_qs -= lhs_qr_qi * lhs_qi_qs # JN=2, JM=3, IK=4 - lhs_qr_qv -= lhs_qr_qi * lhs_qi_qv # JN=2, JM=3, IK=0 - lhs_qs_qi /= lhs_qi_qi # JN=2, JM=4 - lhs_qs_qr -= lhs_qs_qi * lhs_qi_qr # JN=2, JM=4, IK=3 - lhs_qs_qs -= lhs_qs_qi * lhs_qi_qs # JN=2, JM=4, IK=4 - lhs_qs_qv -= lhs_qs_qi * lhs_qi_qv # JN=2, JM=4, IK=0 - lhs_qv_qi /= lhs_qi_qi # JN=2, JM=0 - lhs_qv_qr -= lhs_qv_qi * lhs_qi_qr # JN=2, JM=0, IK=3 - lhs_qv_qs -= lhs_qv_qi * lhs_qi_qs # JN=2, JM=0, IK=4 - lhs_qv_qv -= lhs_qv_qi * lhs_qi_qv # JN=2, JM=0, IK=0 - lhs_qs_qr /= lhs_qr_qr # JN=3, JM=4 - lhs_qs_qs -= lhs_qs_qr * lhs_qr_qs # JN=3, JM=4, IK=4 - lhs_qs_qv -= lhs_qs_qr * lhs_qr_qv # JN=3, JM=4, IK=0 - lhs_qv_qr /= lhs_qr_qr # JN=3, JM=0 - lhs_qv_qs -= lhs_qv_qr * lhs_qr_qs # JN=3, JM=0, IK=4 - lhs_qv_qv -= lhs_qv_qr * lhs_qr_qv # JN=3, JM=0, IK=0 - lhs_qv_qs /= lhs_qs_qs # JN=4, JM=0 - lhs_qv_qv -= lhs_qv_qs * lhs_qs_qv # JN=4, JM=0, IK=0 - - # backsubstitution: step 1 - out_qin[0, 0, 0] -= lhs_qi_ql * out_qln[0, 0, 0] - out_qrn[0, 0, 0] -= lhs_qr_ql * out_qln[0, 0, 0] + lhs_qr_qi * out_qin[0, 0, 0] - out_qsn[0, 0, 0] -= ( - lhs_qs_ql * out_qln[0, 0, 0] - + lhs_qs_qi * out_qin[0, 0, 0] - + lhs_qs_qr * out_qrn[0, 0, 0] - ) - qvn -= ( - lhs_qv_ql * out_qln[0, 0, 0] - + lhs_qv_qi * out_qin[0, 0, 0] - + lhs_qv_qr * out_qrn[0, 0, 0] - + lhs_qv_qs * out_qsn[0, 0, 0] - ) - - # backsubstitution: step 2 - qvn /= lhs_qv_qv - out_qsn[0, 0, 0] -= lhs_qs_qv * qvn - out_qsn[0, 0, 0] /= lhs_qs_qs - out_qrn[0, 0, 0] -= lhs_qr_qs * out_qsn[0, 0, 0] + lhs_qr_qv * qvn - out_qrn[0, 0, 0] /= lhs_qr_qr - out_qin[0, 0, 0] -= ( - lhs_qi_qr * out_qrn[0, 0, 0] + lhs_qi_qs * out_qsn[0, 0, 0] + lhs_qi_qv * qvn - ) - out_qin[0, 0, 0] /= lhs_qi_qi - out_qln[0, 0, 0] -= ( - lhs_ql_qi * out_qin[0, 0, 0] - + lhs_ql_qr * out_qrn[0, 0, 0] - + lhs_ql_qs * out_qsn[0, 0, 0] - + lhs_ql_qv * qvn - ) - out_qln[0, 0, 0] /= lhs_ql_ql - - # ensure no small values (including negatives) remain in cloud variables - # nor precipitation rates - if out_qln[0, 0, 0] < EPSEC: - qvn += out_qln[0, 0, 0] - out_qln[0, 0, 0] = 0.0 - if out_qin[0, 0, 0] < EPSEC: - qvn += out_qin[0, 0, 0] - out_qin[0, 0, 0] = 0.0 - if out_qrn[0, 0, 0] < EPSEC: - qvn += out_qrn[0, 0, 0] - out_qrn[0, 0, 0] = 0.0 - if out_qsn[0, 0, 0] < EPSEC: - qvn += out_qsn[0, 0, 0] - out_qsn[0, 0, 0] = 0.0 - - # *** 5.3: precipitation/sedimentation fluxes to next level diagnostic precipitation fluxes - out_pfplsl[0, 0, 0] = fallsink_ql * out_qln[0, 0, 0] * rdtgdp - out_pfplsi[0, 0, 0] = fallsink_qi * out_qin[0, 0, 0] * rdtgdp - out_pfplsr[0, 0, 0] = fallsink_qr * out_qrn[0, 0, 0] * rdtgdp - out_pfplss[0, 0, 0] = fallsink_qs * out_qsn[0, 0, 0] * rdtgdp - pfplsv = fallsink_qv * qvn * rdtgdp - - # ensure precipitation fraction is zero if no precipitation - qpretot = out_pfplss[0, 0, 0] + out_pfplsr[0, 0, 0] - if qpretot < EPSEC: - tmp_covptot[0, 0] = 0.0 - - # === 6: update tendencies - # *** 6.1: temperature and CLV budgets - flux_ql = ( - psupsatsrce_ql - + convsrce_ql - + fallsrce_ql - - (fallsink_ql + convsink_ql) * out_qln[0, 0, 0] - ) - if __INLINED(PHASEQL == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qln[0, 0, 0] - ql - flux_ql) / dt - if __INLINED(PHASEQL == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qln[0, 0, 0] - ql - flux_ql) / dt - out_tnd_loc_ql[0, 0, 0] += (out_qln[0, 0, 0] - out_ql0[0, 0, 0]) / dt - - flux_qi = ( - psupsatsrce_qi - + convsrce_qi - + fallsrce_qi - - (fallsink_qi + convsink_qi) * out_qin[0, 0, 0] - ) - if __INLINED(PHASEQI == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qin[0, 0, 0] - qi - flux_qi) / dt - if __INLINED(PHASEQI == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qin[0, 0, 0] - qi - flux_qi) / dt - out_tnd_loc_qi[0, 0, 0] += (out_qin[0, 0, 0] - out_qi0[0, 0, 0]) / dt - - flux_qr = ( - psupsatsrce_qr - + convsrce_qr - + fallsrce_qr - - (fallsink_qr + convsink_qr) * out_qrn[0, 0, 0] - ) - if __INLINED(PHASEQR == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qrn[0, 0, 0] - qr - flux_qr) / dt - if __INLINED(PHASEQR == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qrn[0, 0, 0] - qr - flux_qr) / dt - out_tnd_loc_qr[0, 0, 0] += (out_qrn[0, 0, 0] - out_qr0[0, 0, 0]) / dt - - flux_qs = ( - psupsatsrce_qs - + convsrce_qs - + fallsrce_qs - - (fallsink_qs + convsink_qs) * out_qsn[0, 0, 0] - ) - if __INLINED(PHASEQS == 1): - out_tnd_loc_t[0, 0, 0] += RALVDCP * (out_qsn[0, 0, 0] - qs - flux_qs) / dt - if __INLINED(PHASEQS == 2): - out_tnd_loc_t[0, 0, 0] += RALSDCP * (out_qsn[0, 0, 0] - qs - flux_qs) / dt - out_tnd_loc_qs[0, 0, 0] += (out_qsn[0, 0, 0] - out_qs0[0, 0, 0]) / dt - - # *** 6.2: humidity budget - out_tnd_loc_qv[0, 0, 0] += (qvn - qv) / dt - - # *** 6.3: cloud cover - out_tnd_loc_a[0, 0, 0] += da / dt - - # --- copy precipitation fraction into output variable - out_covptot[0, 0, 0] = tmp_covptot[0, 0] - - -@stencil_collection("cloudsc_fluxes") -def cloudsc_fluxes( - in_aph: Field["float"], # staggered - in_foealfa: Field["float"], - in_lneg_qi: Field["float"], - in_lneg_ql: Field["float"], - in_lneg_qr: Field["float"], - in_lneg_qs: Field["float"], - in_lude: Field["float"], - in_pfplsi: Field["float"], - in_pfplsl: Field["float"], - in_pfplsr: Field["float"], - in_pfplss: Field["float"], - in_qi0: Field["float"], - in_qin: Field["float"], - in_ql0: Field["float"], - in_qln: Field["float"], - in_qr0: Field["float"], - in_qrn: Field["float"], - in_qs0: Field["float"], - in_qsn: Field["float"], - in_vfi: Field["float"], - in_vfl: Field["float"], - out_fcqlng: Field["float"], # staggered - out_fcqnng: Field["float"], # staggered - out_fcqrng: Field["float"], # staggered - out_fcqsng: Field["float"], # staggered - out_fhpsl: Field["float"], # staggered - out_fhpsn: Field["float"], # staggered - out_fplsl: Field["float"], # staggered - out_fplsn: Field["float"], # staggered - out_fsqif: Field["float"], # staggered - out_fsqitur: Field["float"], # staggered - out_fsqlf: Field["float"], # staggered - out_fsqltur: Field["float"], # staggered - out_fsqrf: Field["float"], # staggered - out_fsqsf: Field["float"], # staggered - *, - dt: "float", -): - from __externals__ import RG, RLSTT, RLVTT - - # === 7: flux/diagnostics computations - with computation(FORWARD): - with interval(0, 1): - out_fplsl[0, 0, 0] = 0.0 - out_fplsn[0, 0, 0] = 0.0 - out_fhpsl[0, 0, 0] = 0.0 - out_fhpsn[0, 0, 0] = 0.0 - out_fsqlf[0, 0, 0] = 0.0 - out_fsqif[0, 0, 0] = 0.0 - out_fsqrf[0, 0, 0] = 0.0 - out_fsqsf[0, 0, 0] = 0.0 - out_fcqlng[0, 0, 0] = 0.0 - out_fcqnng[0, 0, 0] = 0.0 - out_fcqrng[0, 0, 0] = 0.0 - out_fcqsng[0, 0, 0] = 0.0 - out_fsqltur[0, 0, 0] = 0.0 - out_fsqitur[0, 0, 0] = 0.0 - - with interval(1, None): - # --- copy general precip arrays back info PFP arrays for GRIB archiving - out_fplsl[0, 0, 0] = in_pfplsr[0, 0, -1] + in_pfplsl[0, 0, -1] - out_fplsn[0, 0, 0] = in_pfplss[0, 0, -1] + in_pfplsi[0, 0, -1] - - # --- enthalpy flux due to precipitation - out_fhpsl[0, 0, 0] = -RLVTT * out_fplsl[0, 0, 0] - out_fhpsn[0, 0, 0] = -RLSTT * out_fplsn[0, 0, 0] - - gdph_r = -(in_aph[0, 0, 0] - in_aph[0, 0, -1]) / (RG * dt) - out_fsqlf[0, 0, 0] = out_fsqlf[0, 0, -1] - out_fsqif[0, 0, 0] = out_fsqif[0, 0, -1] - out_fsqrf[0, 0, 0] = out_fsqlf[0, 0, -1] - out_fsqsf[0, 0, 0] = out_fsqif[0, 0, -1] - out_fcqlng[0, 0, 0] = out_fcqlng[0, 0, -1] - out_fcqnng[0, 0, 0] = out_fcqnng[0, 0, -1] - out_fcqrng[0, 0, 0] = out_fcqlng[0, 0, -1] - out_fcqsng[0, 0, 0] = out_fcqnng[0, 0, -1] - out_fsqltur[0, 0, 0] = out_fsqltur[0, 0, -1] - out_fsqitur[0, 0, 0] = out_fsqitur[0, 0, -1] - - # liquid, LS scheme minus detrainment - out_fsqlf[0, 0, 0] += ( - in_qln[0, 0, -1] - - in_ql0[0, 0, -1] - + in_vfl[0, 0, -1] * dt - - in_foealfa[0, 0, -1] * in_lude[0, 0, -1] - ) * gdph_r - # liquid, negative numbers - out_fcqlng[0, 0, 0] += in_lneg_ql[0, 0, -1] * gdph_r - # liquid, vertical diffusion - out_fsqltur[0, 0, 0] += in_vfl[0, 0, -1] * dt * gdph_r - - # rain, LS scheme - out_fsqrf[0, 0, 0] += (in_qrn[0, 0, -1] - in_qr0[0, 0, -1]) * gdph_r - # rain, negative numbers - out_fcqrng[0, 0, 0] += in_lneg_qr[0, 0, -1] * gdph_r - - # ice, LS scheme minus detrainment - out_fsqif[0, 0, 0] += ( - in_qin[0, 0, -1] - - in_qi0[0, 0, -1] - + in_vfi[0, 0, -1] * dt - - (1 - in_foealfa[0, 0, -1]) * in_lude[0, 0, -1] - ) * gdph_r - # ice, negative numbers - out_fcqnng[0, 0, 0] += in_lneg_qi[0, 0, -1] * gdph_r - # ice, vertical diffusion - out_fsqitur[0, 0, 0] += in_vfi[0, 0, -1] * dt * gdph_r - - # snow, LS scheme - out_fsqsf[0, 0, 0] += (in_qsn[0, 0, -1] - in_qs0[0, 0, -1]) * gdph_r - # snow, negative numbers - out_fcqsng[0, 0, 0] += in_lneg_qs[0, 0, -1] * gdph_r diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py deleted file mode 100644 index 27d11ca1..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/cuadjtq.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from gt4py.cartesian import gtscript - -from cloudsc4py.framework.stencil import function_collection -from cloudsc4py.physics._stencils.fcttre import f_foedem, f_foeewm, f_foeldcpm -from cloudsc4py.utils.f2py import ported_function - - -@function_collection("f_cuadjtq_5") -@gtscript.function -def f_cuadjtq_5(qp, qsmix, t): - from __externals__ import RETV - - qsat = min(f_foeewm(t) * qp, 0.5) - cor = 1 / (1 - RETV * qsat) - qsat *= cor - cond = (qsmix - qsat) / (1 + qsat * cor * f_foedem(t)) - t += f_foeldcpm(t) * cond - qsmix -= cond - return qsmix, t - - -@ported_function(from_file="cloudsc_fortran/cloudsc2.F90", from_line=1297, to_line=1314) -@function_collection("f_cuadjtq") -@gtscript.function -def f_cuadjtq(ap, qsmix, t): - qp = 1 / ap - qsmix, t = f_cuadjtq_5(qp, qsmix, t) - qsmix, t = f_cuadjtq_5(qp, qsmix, t) - return qsmix, t diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py deleted file mode 100644 index faeaa00a..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fccld.py +++ /dev/null @@ -1,25 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from gt4py.cartesian import gtscript - -from cloudsc4py.framework.stencil import function_collection -from cloudsc4py.physics._stencils.fcttre import f_foeeice, f_foeeliq -from cloudsc4py.utils.f2py import ported_function - - -@ported_function(from_file="common/include/fccld.func.h", from_line=26, to_line=27) -@function_collection("f_fokoop") -@gtscript.function -def f_fokoop(t): - from __externals__ import RKOOP1, RKOOP2 - - return min(RKOOP1 - RKOOP2 * t, f_foeeliq(t) / f_foeeice(t)) diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py deleted file mode 100644 index ca966aff..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/fcttre.py +++ /dev/null @@ -1,83 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from gt4py.cartesian import gtscript - -from cloudsc4py.framework.stencil import function_collection -from cloudsc4py.utils.f2py import ported_function - - -@ported_function(from_file="common/include/fcttre.func.h", from_line=39, to_line=41) -@function_collection("f_foedelta") -@gtscript.function -def f_foedelta(t): - from __externals__ import RTT - - return 1 if t > RTT else 0 - - -@ported_function(from_file="common/include/fcttre.func.h", from_line=82, to_line=84) -@function_collection("f_foealfa") -@gtscript.function -def f_foealfa(t): - from __externals__ import RTICE, RTWAT, RTWAT_RTICE_R - - return min(1.0, ((max(RTICE, min(RTWAT, t)) - RTICE) * RTWAT_RTICE_R) ** 2) - - -@ported_function(from_file="common/include/fcttre.func.h", from_line=89, to_line=92) -@function_collection("f_foeewm") -@gtscript.function -def f_foeewm(t): - from __externals__ import R2ES, R3IES, R3LES, R4IES, R4LES, RTT - - return R2ES * ( - f_foealfa(t) * exp(R3LES * (t - RTT) / (t - R4LES)) - + (1 - f_foealfa(t)) * (exp(R3IES * (t - RTT) / (t - R4IES))) - ) - - -@ported_function(from_file="common/include/fcttre.func.h", from_line=100, to_line=101) -@function_collection("f_foedem") -@gtscript.function -def f_foedem(t): - from __externals__ import R4IES, R4LES, R5ALSCP, R5ALVCP - - return f_foealfa(t) * R5ALVCP * (1 / (t - R4LES) ** 2) + (1 - f_foealfa(t)) * R5ALSCP * ( - 1 / (t - R4IES) ** 2 - ) - - -@ported_function(from_file="common/include/fcttre.func.h", from_line=103, to_line=104) -@function_collection("f_foeldcpm") -@gtscript.function -def f_foeldcpm(t): - from __externals__ import RALSDCP, RALVDCP - - return f_foealfa(t) * RALVDCP + (1 - f_foealfa(t)) * RALSDCP - - -@ported_function(from_file="common/include/fcttre.func.h", from_line=161, to_line=164) -@function_collection("f_foeeliq") -@gtscript.function -def f_foeeliq(t): - from __externals__ import R2ES, R3LES, R4LES, RTT - - return R2ES * exp(R3LES * (t - RTT) / (t - R4LES)) - - -@ported_function(from_file="common/include/fcttre.func.h", from_line=161, to_line=164) -@function_collection("f_foeeice") -@gtscript.function -def f_foeeice(t): - from __externals__ import R2ES, R3IES, R4IES, RTT - - return R2ES * exp(R3IES * (t - RTT) / (t - R4IES)) diff --git a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py b/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py deleted file mode 100644 index 00cb8f3f..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/_stencils/helpers.py +++ /dev/null @@ -1,269 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from gt4py.cartesian import gtscript - -from cloudsc4py.framework.stencil import function_collection - - -@function_collection("f_helper_0") -@gtscript.function -def f_helper_0( - order, - index1_ql, - index1_qi, - index1_qr, - index1_qs, - index1_qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, -): - minimum = 1e32 - - if index1_ql and ratio_ql < minimum: - order = 1 - minimum = ratio_ql - if index1_qi and ratio_qi < minimum: - order = 2 - minimum = ratio_qi - if index1_qr and ratio_qr < minimum: - order = 3 - minimum = ratio_qr - if index1_qs and ratio_qs < minimum: - order = 4 - minimum = ratio_qs - if index1_qv and ratio_qv < minimum: - order = 0 - - if order == 1: - index1_ql = False - if order == 2: - index1_qi = False - if order == 3: - index1_qr = False - if order == 4: - index1_qs = False - if order == 0: - index1_qv = False - - return order, index1_ql, index1_qi, index1_qr, index1_qs, index1_qv - - -@function_collection("f_helper_1") -@gtscript.function -def f_helper_1( - order, - index3_ql_ql, - index3_ql_qi, - index3_ql_qr, - index3_ql_qs, - index3_ql_qv, - index3_qi_ql, - index3_qi_qi, - index3_qi_qr, - index3_qi_qs, - index3_qi_qv, - index3_qr_ql, - index3_qr_qi, - index3_qr_qr, - index3_qr_qs, - index3_qr_qv, - index3_qs_ql, - index3_qs_qi, - index3_qs_qr, - index3_qs_qs, - index3_qs_qv, - index3_qv_ql, - index3_qv_qi, - index3_qv_qr, - index3_qv_qs, - index3_qv_qv, - ql, - qi, - qr, - qs, - qv, - ratio_ql, - ratio_qi, - ratio_qr, - ratio_qs, - ratio_qv, - sinksum_ql, - sinksum_qi, - sinksum_qr, - sinksum_qs, - sinksum_qv, - solqa_ql_ql, - solqa_ql_qi, - solqa_ql_qr, - solqa_ql_qs, - solqa_ql_qv, - solqa_qi_ql, - solqa_qi_qi, - solqa_qi_qr, - solqa_qi_qs, - solqa_qi_qv, - solqa_qr_ql, - solqa_qr_qi, - solqa_qr_qr, - solqa_qr_qs, - solqa_qr_qv, - solqa_qs_ql, - solqa_qs_qi, - solqa_qs_qr, - solqa_qs_qs, - solqa_qs_qv, - solqa_qv_ql, - solqa_qv_qi, - solqa_qv_qr, - solqa_qv_qs, - solqa_qv_qv, -): - from __externals__ import EPSEC - - # recalculate sum and scaling factor - if order == 1: - index3_ql_ql = solqa_ql_ql < 0.0 - index3_ql_qi = solqa_ql_qi < 0.0 - index3_ql_qr = solqa_ql_qr < 0.0 - index3_ql_qs = solqa_ql_qs < 0.0 - index3_ql_qv = solqa_ql_qv < 0.0 - sinksum_ql -= solqa_ql_ql + solqa_ql_qi + solqa_ql_qr + solqa_ql_qs + solqa_ql_qv - mm = max(ql, EPSEC) - rr = max(sinksum_ql, mm) - ratio_ql = mm / rr - elif order == 2: - index3_qi_ql = solqa_qi_ql < 0.0 - index3_qi_qi = solqa_qi_qi < 0.0 - index3_qi_qr = solqa_qi_qr < 0.0 - index3_qi_qs = solqa_qi_qs < 0.0 - index3_qi_qv = solqa_qi_qv < 0.0 - sinksum_qi -= solqa_qi_ql + solqa_qi_qi + solqa_qi_qr + solqa_qi_qs + solqa_qi_qv - mm = max(qi, EPSEC) - rr = max(sinksum_qi, mm) - ratio_qi = mm / rr - elif order == 3: - index3_qr_ql = solqa_qr_ql < 0.0 - index3_qr_qi = solqa_qr_qi < 0.0 - index3_qr_qr = solqa_qr_qr < 0.0 - index3_qr_qs = solqa_qr_qs < 0.0 - index3_qr_qv = solqa_qr_qv < 0.0 - sinksum_qr -= solqa_qr_ql + solqa_qr_qi + solqa_qr_qr + solqa_qr_qs + solqa_qr_qv - mm = max(qr, EPSEC) - rr = max(sinksum_qr, mm) - ratio_qr = mm / rr - elif order == 4: - index3_qs_ql = solqa_qs_ql < 0.0 - index3_qs_qi = solqa_qs_qi < 0.0 - index3_qs_qr = solqa_qs_qr < 0.0 - index3_qs_qs = solqa_qs_qs < 0.0 - index3_qs_qv = solqa_qs_qv < 0.0 - sinksum_qs -= solqa_qs_ql + solqa_qs_qi + solqa_qs_qr + solqa_qs_qs + solqa_qs_qv - mm = max(qs, EPSEC) - rr = max(sinksum_qs, mm) - ratio_qs = mm / rr - elif order == 0: - index3_qv_ql = solqa_qv_ql < 0.0 - index3_qv_qi = solqa_qv_qi < 0.0 - index3_qv_qr = solqa_qv_qr < 0.0 - index3_qv_qs = solqa_qv_qs < 0.0 - index3_qv_qv = solqa_qv_qv < 0.0 - sinksum_qv -= solqa_qv_ql + solqa_qv_qi + solqa_qv_qr + solqa_qv_qs + solqa_qv_qv - mm = max(qv, EPSEC) - rr = max(sinksum_qv, mm) - ratio_qv = mm / rr - - # scale - if order == 1: - if index3_ql_ql: - solqa_ql_ql *= ratio_ql - solqa_ql_ql *= ratio_ql - if index3_ql_qi: - solqa_ql_qi *= ratio_ql - solqa_qi_ql *= ratio_ql - if index3_ql_qr: - solqa_ql_qr *= ratio_ql - solqa_qr_ql *= ratio_ql - if index3_ql_qs: - solqa_ql_qs *= ratio_ql - solqa_qs_ql *= ratio_ql - if index3_ql_qv: - solqa_ql_qv *= ratio_ql - solqa_qv_ql *= ratio_ql - elif order == 2: - if index3_qi_ql: - solqa_qi_ql *= ratio_qi - solqa_ql_qi *= ratio_qi - if index3_qi_qi: - solqa_qi_qi *= ratio_qi - solqa_qi_qi *= ratio_qi - if index3_qi_qr: - solqa_qi_qr *= ratio_qi - solqa_qr_qi *= ratio_qi - if index3_qi_qs: - solqa_qi_qs *= ratio_qi - solqa_qs_qi *= ratio_qi - if index3_qi_qv: - solqa_qi_qv *= ratio_qi - solqa_qv_qi *= ratio_qi - elif order == 3: - if index3_qr_ql: - solqa_qr_ql *= ratio_qr - solqa_ql_qr *= ratio_qr - if index3_qr_qi: - solqa_qr_qi *= ratio_qr - solqa_qi_qr *= ratio_qr - if index3_qr_qr: - solqa_qr_qr *= ratio_qr - solqa_qr_qr *= ratio_qr - if index3_qr_qs: - solqa_qr_qs *= ratio_qr - solqa_qs_qr *= ratio_qr - if index3_qr_qv: - solqa_qr_qv *= ratio_qr - solqa_qv_qr *= ratio_qr - elif order == 4: - if index3_qs_ql: - solqa_qs_ql *= ratio_qs - solqa_ql_qs *= ratio_qs - if index3_qs_qi: - solqa_qs_qi *= ratio_qs - solqa_qi_qs *= ratio_qs - if index3_qs_qr: - solqa_qs_qr *= ratio_qs - solqa_qr_qs *= ratio_qs - if index3_qs_qs: - solqa_qs_qs *= ratio_qs - solqa_qs_qs *= ratio_qs - if index3_qs_qv: - solqa_qs_qv *= ratio_qs - solqa_qv_qs *= ratio_qs - elif order == 0: - if index3_qv_ql: - solqa_qv_ql *= ratio_qv - solqa_ql_qv *= ratio_qv - if index3_qv_qi: - solqa_qv_qi *= ratio_qv - solqa_qi_qv *= ratio_qv - if index3_qv_qr: - solqa_qv_qr *= ratio_qv - solqa_qr_qv *= ratio_qv - if index3_qv_qs: - solqa_qv_qs *= ratio_qv - solqa_qs_qv *= ratio_qv - if index3_qv_qv: - solqa_qv_qv *= ratio_qv - solqa_qv_qv *= ratio_qv - - return ratio_ql, ratio_qi, ratio_qr, ratio_qs, ratio_qv diff --git a/src/cloudsc_python/src/cloudsc4py/physics/cloudsc.py b/src/cloudsc_python/src/cloudsc4py/physics/cloudsc.py deleted file mode 100644 index f941c5bc..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/cloudsc.py +++ /dev/null @@ -1,227 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from functools import cached_property -from itertools import repeat -import numpy as np -import sys -from typing import TYPE_CHECKING - -from cloudsc4py.framework.components import ImplicitTendencyComponent -from cloudsc4py.framework.grid import I, J, K -from cloudsc4py.framework.storage import managed_temporary_storage -from cloudsc4py.utils.numpyx import assign - -if TYPE_CHECKING: - from datetime import timedelta - from typing import Dict - - from sympl._core.typingx import PropertyDict - - from cloudsc4py.framework.config import GT4PyConfig - from cloudsc4py.framework.grid import ComputationalGrid - from cloudsc4py.utils.iox import ( - YoecldpParameters, - YoethfParameters, - YomcstParameters, - YrecldpParameters, - ) - from cloudsc4py.utils.typingx import StorageDict - - -class Cloudsc(ImplicitTendencyComponent): - def __init__( - self, - computational_grid: ComputationalGrid, - yoecldp_parameters: YoecldpParameters, - yoethf_parameters: YoethfParameters, - yomcst_parameters: YomcstParameters, - yrecldp_parameters: YrecldpParameters, - *, - enable_checks: bool = True, - gt4py_config: GT4PyConfig, - ) -> None: - super().__init__(computational_grid, enable_checks=enable_checks, gt4py_config=gt4py_config) - - self.nlev = self.computational_grid.grids[I, J, K].shape[2] - externals = {} - externals.update(yoecldp_parameters.dict()) - externals.update(yoethf_parameters.dict()) - externals.update(yomcst_parameters.dict()) - externals.update(yrecldp_parameters.dict()) - externals.update( - { - "DEPICE": 1, - "EPSEC": 1e-14, - "EPSILON": 100 * sys.float_info.epsilon, - "EVAPRAIN": 2, - "EVAPSNOW": 1, - "FALLQV": False, - "FALLQL": False, - "FALLQI": False, - "FALLQR": True, - "FALLQS": True, - "MELTQV": -99, - "MELTQL": yoecldp_parameters.NCLDQI, - "MELTQI": yoecldp_parameters.NCLDQR, - "MELTQR": yoecldp_parameters.NCLDQS, - "MELTQS": yoecldp_parameters.NCLDQR, - "NLEV": self.nlev, - "PHASEQV": 0, - "PHASEQL": 1, - "PHASEQI": 2, - "PHASEQR": 1, - "PHASEQS": 2, - "RDCP": yomcst_parameters.RD / yomcst_parameters.RCPD, - "RLDCP": 1 / (yoethf_parameters.RALSDCP - yoethf_parameters.RALVDCP), - "TW1": 1329.31, - "TW2": 0.0074615, - "TW3": 0.85e5, - "TW4": 40.637, - "TW5": 275.0, - "VQV": 0.0, - "VQL": 0.0, - "VQI": yrecldp_parameters.RVICE, - "VQR": yrecldp_parameters.RVRAIN, - "VQS": yrecldp_parameters.RVSNOW, - "WARMRAIN": 2, - } - ) - - self.cloudsc = self.compile_stencil("cloudsc", externals) - - @cached_property - def _input_properties(self) -> PropertyDict: - # todo(stubbiali): sort out units - return { - "b_convection_on": {"grid": (I, J), "units": ""}, - "f_a": {"grid": (I, J, K), "units": ""}, - "f_ap": {"grid": (I, J, K), "units": ""}, - "f_aph": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_ccn": {"grid": (I, J, K), "units": ""}, - "f_hrlw": {"grid": (I, J, K), "units": ""}, - "f_hrsw": {"grid": (I, J, K), "units": ""}, - "f_icrit_aer": {"grid": (I, J, K), "units": ""}, - "f_lcrit_aer": {"grid": (I, J, K), "units": ""}, - "f_lsm": {"grid": (I, J), "units": ""}, - "f_lu": {"grid": (I, J, K), "units": ""}, - "f_lude": {"grid": (I, J, K), "units": ""}, - "f_mfd": {"grid": (I, J, K), "units": ""}, - "f_mfu": {"grid": (I, J, K), "units": ""}, - "f_nice": {"grid": (I, J, K), "units": ""}, - "f_qi": {"grid": (I, J, K), "units": ""}, - "f_ql": {"grid": (I, J, K), "units": ""}, - "f_qr": {"grid": (I, J, K), "units": ""}, - "f_qs": {"grid": (I, J, K), "units": ""}, - "f_qv": {"grid": (I, J, K), "units": ""}, - "f_re_ice": {"grid": (I, J, K), "units": ""}, - "f_snde": {"grid": (I, J, K), "units": ""}, - "f_supsat": {"grid": (I, J, K), "units": ""}, - "f_t": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_a": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qi": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_ql": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qr": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qs": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qv": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_t": {"grid": (I, J, K), "units": ""}, - "f_vfi": {"grid": (I, J, K), "units": ""}, - "f_vfl": {"grid": (I, J, K), "units": ""}, - "f_w": {"grid": (I, J, K), "units": ""}, - "i_convection_type": {"grid": (I, J), "units": ""}, - } - - @cached_property - def _tendency_properties(self) -> PropertyDict: - # todo(stubbiali): sort out units - return { - "f_a": {"grid": (I, J, K), "units": "s^-1"}, - "f_t": {"grid": (I, J, K), "units": "s^-1"}, - "f_qv": {"grid": (I, J, K), "units": "s^-1"}, - "f_ql": {"grid": (I, J, K), "units": "s^-1"}, - "f_qi": {"grid": (I, J, K), "units": "s^-1"}, - "f_qr": {"grid": (I, J, K), "units": "s^-1"}, - "f_qs": {"grid": (I, J, K), "units": "s^-1"}, - } - - @cached_property - def _diagnostic_properties(self) -> PropertyDict: - # todo(stubbiali): sort out units - return { - "f_covptot": {"grid": (I, J, K), "units": ""}, - "f_fcqlng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fcqnng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fcqrng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fcqsng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fhpsl": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fhpsn": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fplsl": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fplsn": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqif": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqitur": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqlf": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqltur": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqrf": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqsf": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_rainfrac_toprfz": {"grid": (I, J), "units": ""}, - } - - def array_call( - self, - state: StorageDict, - timestep: timedelta, - out_tendencies: StorageDict, - out_diagnostics: StorageDict, - overwrite_tendencies: Dict[str, bool], - ) -> None: - with managed_temporary_storage( - self.computational_grid, - *repeat(((I, J), "float"), 6), - ((I, J), "bool"), - ((K,), "int"), - gt4py_config=self.gt4py_config, - ) as (aph_s, cldtopdist, covpmax, covptot, paphd, trpaus, rainliq, klevel): - inputs = { - "in_" + name.split("_", maxsplit=1)[1]: state[name] - for name in self.input_properties - } - tendencies = { - "out_tnd_loc_" + name.split("_", maxsplit=1)[1]: out_tendencies[name] - for name in self.tendency_properties - } - diagnostics = { - "out_" + name.split("_", maxsplit=1)[1]: out_diagnostics[name] - for name in self.diagnostic_properties - } - temporaries = { - "tmp_aph_s": aph_s, - "tmp_cldtopdist": cldtopdist, - "tmp_covpmax": covpmax, - "tmp_covptot": covptot, - "tmp_klevel": klevel, - "tmp_paphd": paphd, - "tmp_rainliq": rainliq, - "tmp_trpaus": trpaus, - } - aph_s[...] = state["f_aph"][..., self.nlev] - assign(klevel, np.arange(self.nlev + 1)) - self.cloudsc( - **inputs, - **tendencies, - **diagnostics, - **temporaries, - dt=timestep.total_seconds(), - origin=(0, 0, 0), - domain=self.computational_grid.grids[I, J, K - 1 / 2].shape, - validate_args=self.gt4py_config.validate_args, - exec_info=self.gt4py_config.exec_info, - ) diff --git a/src/cloudsc_python/src/cloudsc4py/physics/cloudsc_split.py b/src/cloudsc_python/src/cloudsc4py/physics/cloudsc_split.py deleted file mode 100644 index d29ea96d..00000000 --- a/src/cloudsc_python/src/cloudsc4py/physics/cloudsc_split.py +++ /dev/null @@ -1,318 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from functools import cached_property -from itertools import repeat -import numpy as np -import sys -from typing import TYPE_CHECKING - -from cloudsc4py.framework.components import ImplicitTendencyComponent -from cloudsc4py.framework.grid import I, J, K -from cloudsc4py.framework.storage import managed_temporary_storage -from cloudsc4py.utils.numpyx import assign - -if TYPE_CHECKING: - from datetime import timedelta - from typing import Dict - - from sympl._core.typingx import PropertyDict - - from cloudsc4py.framework.config import GT4PyConfig - from cloudsc4py.framework.grid import ComputationalGrid - from cloudsc4py.utils.iox import ( - YoecldpParameters, - YoethfParameters, - YomcstParameters, - YrecldpParameters, - ) - from cloudsc4py.utils.typingx import StorageDict - - -class Cloudsc(ImplicitTendencyComponent): - def __init__( - self, - computational_grid: ComputationalGrid, - yoecldp_parameters: YoecldpParameters, - yoethf_parameters: YoethfParameters, - yomcst_parameters: YomcstParameters, - yrecldp_parameters: YrecldpParameters, - *, - enable_checks: bool = True, - gt4py_config: GT4PyConfig, - ) -> None: - super().__init__(computational_grid, enable_checks=enable_checks, gt4py_config=gt4py_config) - - self.nlev = self.computational_grid.grids[I, J, K].shape[2] - externals = {} - externals.update(yoecldp_parameters.dict()) - externals.update(yoethf_parameters.dict()) - externals.update(yomcst_parameters.dict()) - externals.update(yrecldp_parameters.dict()) - externals.update( - { - "DEPICE": 1, - "EPSEC": 1e-14, - "EPSILON": 100 * sys.float_info.epsilon, - "EVAPRAIN": 2, - "EVAPSNOW": 1, - "FALLQV": False, - "FALLQL": False, - "FALLQI": False, - "FALLQR": True, - "FALLQS": True, - "MELTQV": -99, - "MELTQL": yoecldp_parameters.NCLDQI, - "MELTQI": yoecldp_parameters.NCLDQR, - "MELTQR": yoecldp_parameters.NCLDQS, - "MELTQS": yoecldp_parameters.NCLDQR, - "NLEV": self.nlev, - "PHASEQV": 0, - "PHASEQL": 1, - "PHASEQI": 2, - "PHASEQR": 1, - "PHASEQS": 2, - "RDCP": yomcst_parameters.RD / yomcst_parameters.RCPD, - "RLDCP": 1 / (yoethf_parameters.RALSDCP - yoethf_parameters.RALVDCP), - "TW1": 1329.31, - "TW2": 0.0074615, - "TW3": 0.85e5, - "TW4": 40.637, - "TW5": 275.0, - "VQV": 0.0, - "VQL": 0.0, - "VQI": yrecldp_parameters.RVICE, - "VQR": yrecldp_parameters.RVRAIN, - "VQS": yrecldp_parameters.RVSNOW, - "WARMRAIN": 2, - } - ) - - self.cloudsc_tendencies = self.compile_stencil("cloudsc_tendencies", externals) - self.cloudsc_fluxes = self.compile_stencil("cloudsc_fluxes", externals) - - @cached_property - def _input_properties(self) -> PropertyDict: - # todo(stubbiali): sort out units - return { - "b_convection_on": {"grid": (I, J), "units": ""}, - "f_a": {"grid": (I, J, K), "units": ""}, - "f_ap": {"grid": (I, J, K), "units": ""}, - "f_aph": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_ccn": {"grid": (I, J, K), "units": ""}, - "f_hrlw": {"grid": (I, J, K), "units": ""}, - "f_hrsw": {"grid": (I, J, K), "units": ""}, - "f_icrit_aer": {"grid": (I, J, K), "units": ""}, - "f_lcrit_aer": {"grid": (I, J, K), "units": ""}, - "f_lsm": {"grid": (I, J), "units": ""}, - "f_lu": {"grid": (I, J, K), "units": ""}, - "f_lude": {"grid": (I, J, K), "units": ""}, - "f_mfd": {"grid": (I, J, K), "units": ""}, - "f_mfu": {"grid": (I, J, K), "units": ""}, - "f_nice": {"grid": (I, J, K), "units": ""}, - "f_qi": {"grid": (I, J, K), "units": ""}, - "f_ql": {"grid": (I, J, K), "units": ""}, - "f_qr": {"grid": (I, J, K), "units": ""}, - "f_qs": {"grid": (I, J, K), "units": ""}, - "f_qv": {"grid": (I, J, K), "units": ""}, - "f_re_ice": {"grid": (I, J, K), "units": ""}, - "f_snde": {"grid": (I, J, K), "units": ""}, - "f_supsat": {"grid": (I, J, K), "units": ""}, - "f_t": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_a": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qi": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_ql": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qr": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qs": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_qv": {"grid": (I, J, K), "units": ""}, - "f_tnd_tmp_t": {"grid": (I, J, K), "units": ""}, - "f_vfi": {"grid": (I, J, K), "units": ""}, - "f_vfl": {"grid": (I, J, K), "units": ""}, - "f_w": {"grid": (I, J, K), "units": ""}, - "i_convection_type": {"grid": (I, J), "units": ""}, - } - - @cached_property - def _tendency_properties(self) -> PropertyDict: - # todo(stubbiali): sort out units - return { - "f_a": {"grid": (I, J, K), "units": "s^-1"}, - "f_t": {"grid": (I, J, K), "units": "s^-1"}, - "f_qv": {"grid": (I, J, K), "units": "s^-1"}, - "f_ql": {"grid": (I, J, K), "units": "s^-1"}, - "f_qi": {"grid": (I, J, K), "units": "s^-1"}, - "f_qr": {"grid": (I, J, K), "units": "s^-1"}, - "f_qs": {"grid": (I, J, K), "units": "s^-1"}, - } - - @cached_property - def _diagnostic_properties(self) -> PropertyDict: - # todo(stubbiali): sort out units - return { - "f_covptot": {"grid": (I, J, K), "units": ""}, - "f_fcqlng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fcqnng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fcqrng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fcqsng": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fhpsl": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fhpsn": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fplsl": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fplsn": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqif": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqitur": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqlf": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqltur": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqrf": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_fsqsf": {"grid": (I, J, K - 1 / 2), "units": ""}, - "f_rainfrac_toprfz": {"grid": (I, J), "units": ""}, - } - - def array_call( - self, - state: StorageDict, - timestep: timedelta, - out_tendencies: StorageDict, - out_diagnostics: StorageDict, - overwrite_tendencies: Dict[str, bool], - ) -> None: - with managed_temporary_storage( - self.computational_grid, - *repeat(((I, J), "float"), 6), - ((I, J), "bool"), - ((K,), "int"), - *repeat(((I, J, K), "float"), 18), - gt4py_config=self.gt4py_config, - ) as ( - aph_s, - cldtopdist, - covpmax, - covptot, - paphd, - trpaus, - rainliq, - klevel, - foealfa, - lneg_qi, - lneg_ql, - lneg_qr, - lneg_qs, - lude, - pfplsi, - pfplsl, - pfplsr, - pfplss, - qi0, - qin, - ql0, - qln, - qr0, - qrn, - qs0, - qsn, - ): - inputs = { - "in_" + name.split("_", maxsplit=1)[1]: state[name] - for name in self.input_properties - } - tendencies = { - "out_tnd_loc_" + name.split("_", maxsplit=1)[1]: out_tendencies[name] - for name in self.tendency_properties - } - diagnostics = { - "out_" + name.split("_", maxsplit=1)[1]: out_diagnostics[name] - for name in self.diagnostic_properties - } - temporaries = { - "tmp_aph_s": aph_s, - "tmp_cldtopdist": cldtopdist, - "tmp_covpmax": covpmax, - "tmp_covptot": covptot, - "tmp_klevel": klevel, - "tmp_paphd": paphd, - "tmp_rainliq": rainliq, - "tmp_trpaus": trpaus, - } - aph_s[...] = state["f_aph"][..., self.nlev] - assign(klevel, np.arange(self.nlev + 1)) - - inputs1 = inputs.copy() - vfi = inputs1.pop("in_vfi") - vfl = inputs1.pop("in_vfl") - diagnostics1 = { - "out_covptot": diagnostics["out_covptot"], - "out_foealfa": foealfa, - "out_lneg_qi": lneg_qi, - "out_lneg_ql": lneg_ql, - "out_lneg_qr": lneg_qr, - "out_lneg_qs": lneg_qs, - "out_lude": lude, - "out_pfplsi": pfplsi, - "out_pfplsl": pfplsl, - "out_pfplsr": pfplsr, - "out_pfplss": pfplss, - "out_qi0": qi0, - "out_qin": qin, - "out_ql0": ql0, - "out_qln": qln, - "out_qr0": qr0, - "out_qrn": qrn, - "out_qs0": qs0, - "out_qsn": qsn, - "out_rainfrac_toprfz": diagnostics["out_rainfrac_toprfz"], - } - self.cloudsc_tendencies( - **inputs1, - **tendencies, - **diagnostics1, - **temporaries, - dt=timestep.total_seconds(), - origin=(0, 0, 0), - domain=self.computational_grid.grids[I, J, K].shape, - validate_args=self.gt4py_config.validate_args, - exec_info=self.gt4py_config.exec_info, - ) - - inputs2 = { - "in_aph": inputs["in_aph"], - "in_foealfa": foealfa, - "in_lneg_qi": lneg_qi, - "in_lneg_ql": lneg_ql, - "in_lneg_qr": lneg_qr, - "in_lneg_qs": lneg_qs, - "in_lude": lude, - "in_pfplsi": pfplsi, - "in_pfplsl": pfplsl, - "in_pfplsr": pfplsr, - "in_pfplss": pfplss, - "in_qi0": qi0, - "in_qin": qin, - "in_ql0": ql0, - "in_qln": qln, - "in_qr0": qr0, - "in_qrn": qrn, - "in_qs0": qs0, - "in_qsn": qsn, - "in_vfi": vfi, - "in_vfl": vfl, - } - outputs2 = diagnostics.copy() - outputs2.pop("out_covptot") - outputs2.pop("out_rainfrac_toprfz") - self.cloudsc_fluxes( - **inputs2, - **outputs2, - dt=timestep.total_seconds(), - origin=(0, 0, 0), - domain=self.computational_grid.grids[I, J, K - 1 / 2].shape, - validate_args=self.gt4py_config.validate_args, - exec_info=self.gt4py_config.exec_info, - ) diff --git a/src/cloudsc_python/src/cloudsc4py/utils/__init__.py b/src/cloudsc_python/src/cloudsc4py/utils/__init__.py deleted file mode 100644 index 95e3c8ad..00000000 --- a/src/cloudsc_python/src/cloudsc4py/utils/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. diff --git a/src/cloudsc_python/src/cloudsc4py/utils/f2py.py b/src/cloudsc_python/src/cloudsc4py/utils/f2py.py deleted file mode 100644 index f8ad6b9d..00000000 --- a/src/cloudsc_python/src/cloudsc4py/utils/f2py.py +++ /dev/null @@ -1,48 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Callable, Sequence - from typing import Optional, Union - - -PORTED_OBJECTS = {} - - -def ported_object( - handle: Optional[Callable] = None, - from_file: Optional[Union[str, Sequence[str]]] = None, - from_line: Optional[int] = None, - to_line: Optional[int] = None, -) -> Callable: - if from_line is not None and to_line is not None: - assert from_line <= to_line - - def core(obj): - PORTED_OBJECTS[obj.__name__] = obj - setattr(obj, "from_file", from_file) - setattr(obj, "from_line", from_line) - setattr(obj, "to_line", to_line) - return obj - - if handle is not None: - return core(handle) - else: - return core - - -# convenient aliases to improve readability -ported_class = ported_object -ported_function = ported_object -ported_method = ported_object diff --git a/src/cloudsc_python/src/cloudsc4py/utils/iox.py b/src/cloudsc_python/src/cloudsc4py/utils/iox.py deleted file mode 100644 index 53307c1b..00000000 --- a/src/cloudsc_python/src/cloudsc4py/utils/iox.py +++ /dev/null @@ -1,328 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from datetime import timedelta -from functools import lru_cache -import h5py -import numpy as np -from pydantic import BaseModel -from typing import TYPE_CHECKING - -from cloudsc4py.utils.f2py import ported_method - -if TYPE_CHECKING: - from collections.abc import Callable - from typing import Optional, Type - - from cloudsc4py.framework.config import DataTypes - - -class YoecldpParameters(BaseModel): - NCLDQI: int - NCLDQL: int - NCLDQR: int - NCLDQS: int - NCLDQV: int - NCLV: int - - -class YoethfParameters(BaseModel): - R2ES: float - R3IES: float - R3LES: float - R4IES: float - R4LES: float - R5ALSCP: float - R5ALVCP: float - R5IES: float - R5LES: float - RALFDCP: float - RALSDCP: float - RALVDCP: float - RKOOP1: float - RKOOP2: float - RTICE: float - RTICECU: float - RTWAT: float - RTWAT_RTICECU_R: float - RTWAT_RTICE_R: float - - -class YomcstParameters(BaseModel): - RCPD: float - RD: float - RETV: float - RG: float - RLMLT: float - RLSTT: float - RLVTT: float - RTT: float - RV: float - - -class YrecldpParameters(BaseModel): - LAERICEAUTO: bool - LAERICESED: bool - LAERLIQAUTOCP: bool - LAERLIQAUTOCPB: bool - LAERLIQAUTOLSP: bool - LAERLIQCOLL: bool - LCLDBUDGET: bool - LCLDEXTRA: bool - NAECLBC: int - NAECLDU: int - NAECLOM: int - NAECLSS: int - NAECLSU: int - NAERCLD: int - NBETA: int - NCLDDIAG: int - NCLDTOP: int - NSHAPEP: int - NSHAPEQ: int - NSSOPT: int - RAMID: float - RAMIN: float - RCCN: float - RCCNOM: float - RCCNSS: float - RCCNSU: float - RCLCRIT: float - RCLCRIT_LAND: float - RCLCRIT_SEA: float - RCLDIFF: float - RCLDIFF_CONVI: float - RCLDMAX: float - RCLDTOPCF: float - RCLDTOPP: float - RCL_AI: float - RCL_APB1: float - RCL_APB2: float - RCL_APB3: float - RCL_AR: float - RCL_AS: float - RCL_BI: float - RCL_BR: float - RCL_BS: float - RCL_CDENOM1: float - RCL_CDENOM2: float - RCL_CDENOM3: float - RCL_CI: float - RCL_CONST1I: float - RCL_CONST1R: float - RCL_CONST1S: float - RCL_CONST2I: float - RCL_CONST2R: float - RCL_CONST2S: float - RCL_CONST3I: float - RCL_CONST3R: float - RCL_CONST3S: float - RCL_CONST4I: float - RCL_CONST4R: float - RCL_CONST4S: float - RCL_CONST5I: float - RCL_CONST5R: float - RCL_CONST5S: float - RCL_CONST6I: float - RCL_CONST6R: float - RCL_CONST6S: float - RCL_CONST7S: float - RCL_CONST8S: float - RCL_CR: float - RCL_CS: float - RCL_DI: float - RCL_DR: float - RCL_DS: float - RCL_DYNVISC: float - RCL_FAC1: float - RCL_FAC2: float - RCL_FZRAB: float - RCL_FZRBB: float - RCL_KA273: float - RCL_KKAac: float - RCL_KKAau: float - RCL_KKBac: float - RCL_KKBaun: float - RCL_KKBauq: float - RCL_KK_cloud_num_land: float - RCL_KK_cloud_num_sea: float - RCL_SCHMIDT: float - RCL_X1I: float - RCL_X1R: float - RCL_X1S: float - RCL_X2I: float - RCL_X2R: float - RCL_X2S: float - RCL_X3I: float - RCL_X3S: float - RCL_X41: float - RCL_X4R: float - RCL_X4S: float - RCOVPMIN: float - RDENSREF: float - RDENSWAT: float - RDEPLIQREFDEPTH: float - RDEPLIQREFRATE: float - RICEHI1: float - RICEHI2: float - RICEINIT: float - RKCONV: float - RKOOPTAU: float - RLCRITSNOW: float - RLMIN: float - RNICE: float - RPECONS: float - RPRC1: float - RPRC2: float - RPRECRHMAX: float - RSNOWLIN1: float - RSNOWLIN2: float - RTAUMEL: float - RTHOMO: float - RVICE: float - RVRAIN: float - RVRFACTOR: float - RVSNOW: float - - -class HDF5Reader: - f: h5py.File - data_types: DataTypes - - def __init__(self, filename: str, data_types: DataTypes) -> None: - self.f = h5py.File(filename) - self.data_types = data_types - - def __del__(self) -> None: - self.f.close() - - def get_field(self, name: str) -> np.ndarray: - ds = self.f.get(name, None) - if ds is None: - raise RuntimeError(f"Unknown field `{name}`.") - - if ds.ndim == 1: - return self._get_field_1d(ds, name) - elif ds.ndim == 2: - return self._get_field_2d(ds, name) - elif ds.ndim == 3: - return self._get_field_3d(ds, name) - else: - raise RuntimeError(f"The field `{name}` has unexpected shape {ds.shape}.") - - @lru_cache - def get_nlev(self) -> int: - return self.f["KLEV"][0] - - @lru_cache - def get_nlon(self) -> int: - return self.f["KLON"][0] - - def get_timestep(self) -> timedelta: - return timedelta(seconds=float(self._get_parameter_f("PTSPHY"))) - - @ported_method(from_file="common/module/yoecldp.F90", from_line=86, to_line=91) - def get_yoecldp_parameters(self) -> YoecldpParameters: - return YoecldpParameters( - **{"NCLV": 5, "NCLDQL": 1, "NCLDQI": 2, "NCLDQR": 3, "NCLDQS": 4, "NCLDQV": 5} - ) - - @ported_method(from_file="common/module/yoethf.F90", from_line=79, to_line=99) - def get_yoethf_parameters(self) -> YoethfParameters: - return self._initialize_parameters(YoethfParameters) - - @ported_method(from_file="common/module/yomcst.F90", from_line=167, to_line=177) - def get_yomcst_parameters(self) -> YomcstParameters: - return self._initialize_parameters(YomcstParameters) - - @ported_method(from_file="common/module/yoecldp.F90", from_line=242, to_line=370) - def get_yrecldp_parameters(self) -> YrecldpParameters: - return self._initialize_parameters( - YrecldpParameters, get_parameter_name=lambda attr_name: "YRECLDP_" + attr_name - ) - - def _get_field_1d(self, ds: h5py.Dataset, name: str) -> np.ndarray: - nlon = self.get_nlon() - nlev = self.get_nlev() - if nlon <= ds.shape[0] <= nlon + 1 or nlev <= ds.shape[0] <= nlev + 1: - return ds[:] - else: - raise RuntimeError( - f"The field `{name}` is expected to have shape ({nlon}(+1),) or " - f"({nlev}(+1),), but has shape {ds.shape}." - ) - - def _get_field_2d(self, ds, name): - nlon = self.get_nlon() - nlev = self.get_nlev() - if nlon <= ds.shape[0] <= nlon + 1 and nlev <= ds.shape[1] <= nlev + 1: - return ds[...] - elif nlon <= ds.shape[1] <= nlon + 1 and nlev <= ds.shape[0] <= nlev + 1: - return np.transpose(ds[...]) - else: - raise RuntimeError( - f"The field `{name}` is expected to have shape " - f"({nlon}(+1), {nlev}(+1)) or ({nlev}(+1), {nlon}(+1)), " - f"but has shape {ds.shape}." - ) - - def _get_field_3d(self, ds, name): - nlon = self.get_nlon() - nlev = self.get_nlev() - - if nlon in ds.shape: - axes = [ds.shape.index(nlon)] - elif nlon + 1 in ds.shape: - axes = [ds.shape.index(nlon + 1)] - else: - raise RuntimeError(f"The field `{name}` has unexpected shape {ds.shape}.") - - if nlev in ds.shape: - axes += [ds.shape.index(nlev)] - elif nlev + 1 in ds.shape: - axes += [ds.shape.index(nlev + 1)] - else: - raise RuntimeError(f"The field `{name}` has unexpected shape {ds.shape}.") - - axes += tuple({0, 1, 2} - set(axes)) - - return np.transpose(ds[...], axes=axes) - - def _initialize_parameters( - self, - parameter_cls: Type[BaseModel], - get_parameter_name: Optional[Callable[[str], str]] = None, - ): - init_dict = {} - for attr_name, metadata in parameter_cls.schema()["properties"].items(): - param_name = ( - get_parameter_name(attr_name) if get_parameter_name is not None else attr_name - ) - param_type = metadata["type"] - if param_type == "boolean": - init_dict[attr_name] = self._get_parameter_b(param_name) - elif param_type == "number": - init_dict[attr_name] = self._get_parameter_f(param_name) - elif param_type == "integer": - init_dict[attr_name] = self._get_parameter_i(param_name) - else: - raise ValueError(f"Invalid parameter type `{param_type}`.") - return parameter_cls(**init_dict) - - def _get_parameter_b(self, name: str) -> bool: - return self.data_types.bool(self.f.get(name, [True])[0]) - - def _get_parameter_f(self, name: str) -> float: - return self.data_types.float(self.f.get(name, [0.0])[0]) - - def _get_parameter_i(self, name: str) -> int: - return self.data_types.int(self.f.get(name, [0])[0]) diff --git a/src/cloudsc_python/src/cloudsc4py/utils/numpyx.py b/src/cloudsc_python/src/cloudsc4py/utils/numpyx.py deleted file mode 100644 index 0e263962..00000000 --- a/src/cloudsc_python/src/cloudsc4py/utils/numpyx.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -import numpy as np -from typing import TYPE_CHECKING - -try: - import cupy as cp -except ImportError: - cp = np - -if TYPE_CHECKING: - from cloudsc4py.utils.typingx import Storage - - -def to_numpy(storage: Storage) -> np.ndarray: - try: - return storage.get() - except AttributeError: - return storage - - -def assign(lhs: Storage, rhs: Storage) -> None: - if isinstance(lhs, cp.ndarray) and isinstance(rhs, np.ndarray): - lhs[...] = cp.asarray(rhs) - elif isinstance(lhs, np.ndarray) and isinstance(rhs, cp.ndarray): - lhs[...] = rhs.get() - else: - lhs[...] = rhs diff --git a/src/cloudsc_python/src/cloudsc4py/utils/timing.py b/src/cloudsc_python/src/cloudsc4py/utils/timing.py deleted file mode 100644 index 65d3cd9a..00000000 --- a/src/cloudsc_python/src/cloudsc4py/utils/timing.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -from typing import TYPE_CHECKING - -from sympl._core.time import Timer - -if TYPE_CHECKING: - from typing import Type - - -class timing: - def __init__(self, label: str) -> None: - self.label = label - - def __enter__(self) -> Type[Timer]: - Timer.start(self.label) - return Timer - - def __exit__(self, exc_type, exc_value, exc_tb) -> None: - Timer.stop() diff --git a/src/cloudsc_python/src/cloudsc4py/utils/typingx.py b/src/cloudsc_python/src/cloudsc4py/utils/typingx.py deleted file mode 100644 index a0a2cfdc..00000000 --- a/src/cloudsc_python/src/cloudsc4py/utils/typingx.py +++ /dev/null @@ -1,28 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -import numpy as np -from typing import Dict, TypeVar, Union - -from sympl import DataArray as SymplDataArray - -try: - import cupy as cp -except ImportError: - cp = np - - -DataArray = SymplDataArray -DataArrayDict = Dict[str, DataArray] -ParameterDict = Dict[str, Union[bool, float, int]] -Storage = Union[np.ndarray, cp.ndarray] -StorageDict = Dict[str, Storage] -Range = TypeVar("Range") diff --git a/src/cloudsc_python/src/cloudsc4py/utils/validation.py b/src/cloudsc_python/src/cloudsc4py/utils/validation.py deleted file mode 100644 index ab8b7aa1..00000000 --- a/src/cloudsc_python/src/cloudsc4py/utils/validation.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- - -# (C) Copyright 2018- ECMWF. -# (C) Copyright 2022- ETH Zurich. - -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - -from __future__ import annotations -import numpy as np -from typing import TYPE_CHECKING - -from cloudsc4py.utils.numpyx import to_numpy - -if TYPE_CHECKING: - from typing import Tuple - - from sympl._core.data_array import DataArray - from sympl._core.typingx import DataArrayDict - - from cloudsc4py.utils.typingx import Storage - - -def validate_storage_2d(src: Storage, trg: Storage) -> bool: - src_np = to_numpy(src) - trg_np = to_numpy(trg) - mi = min(src_np.shape[0], trg_np.shape[0]) - mj = min(src_np.shape[1], trg_np.shape[1]) - return np.allclose(src_np[:mi, :mj], trg_np[:mi, :mj], atol=1e-18, rtol=1e-12) - - -def validate_storage_3d(src: Storage, trg: Storage) -> bool: - src_np = to_numpy(src) - trg_np = to_numpy(trg) - mi = min(src_np.shape[0], trg_np.shape[0]) - mj = min(src_np.shape[1], trg_np.shape[1]) - mk = min(src_np.shape[2], trg_np.shape[2]) - return np.allclose(src_np[:mi, :mj, :mk], trg_np[:mi, :mj, :mk], atol=1e-18, rtol=1e-12) - - -def validate_field(src: DataArray, trg: DataArray) -> bool: - if src.ndim == 2: - return validate_storage_2d(src.data, trg.data) - elif src.ndim == 3: - return validate_storage_3d(src.data, trg.data) - else: - raise ValueError("The field to validate must be either 2-d or 3-d.") - - -def validate(src: DataArrayDict, trg: DataArrayDict) -> Tuple[str]: - return tuple( - name - for name in src - if name in trg and name != "time" and not validate_field(src[name], trg[name]) - ) From 64c3f8af88788fd1e2e79da74e7161ccb31dea01 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 16 May 2023 01:27:20 +0200 Subject: [PATCH 036/174] use FieldSet instead of array of Field, this time in a thread same manner --- .../cloudsc_driver_mod.F90 | 22 +- .../cloudsc_global_atlas_state_mod.F90 | 223 ++++-------------- 2 files changed, 54 insertions(+), 191 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index d173c870..a6d11c27 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -14,7 +14,7 @@ MODULE CLOUDSC_DRIVER_MOD USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM USE EC_PMON_MOD, ONLY: EC_PMON - USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW, CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS + USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW USE ATLAS_MODULE USE, INTRINSIC :: ISO_C_BINDING @@ -33,7 +33,6 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) INTEGER(KIND=JPIM), INTENT(IN) :: NUMOMP, NGPTOT, NGPTOTG, KFLDX REAL(KIND=JPRB), INTENT(IN) :: PTSPHY ! Physics timestep - TYPE(CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS) :: SFIELDS TYPE(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW) :: FBLOCK TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(ATLAS_FIELD) :: FIELD @@ -68,9 +67,6 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) ! Global timer for the parallel region CALL TIMER%START(NUMOMP) - - CALL SFIELDS%SETUP(FSET) - !$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power,FBLOCK) & !$omp& num_threads(NUMOMP) @@ -84,26 +80,12 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) ICEND=MIN(NPROMA,NGPTOT-JKGLO+1) ! get block views - call FBLOCK%GET_BLOCK(SFIELDS, IBL) - CONTINUE + call FBLOCK%GET_BLOCK(FSET, IBL) !-- These were uninitialized : meaningful only when we compare error differences FBLOCK%PCOVPTOT(:,:) = 0.0_JPRB FBLOCK%TENDENCY_LOC%cld(:,:,NCLV) = 0.0_JPRB - !--- a future plan to replace the call to CLOUDSC ------ - ! - ! type( block_state_t ) - ! real(c_double), pointer :: PT(:,:) - ! type(state_type) :: tendency_LOC - ! type(state_type) :: tendency_TMP - ! type(state_type) :: tendency_CML - ! end type - ! call extract_block( FSET, IBL, config, block_state ) - ! call FSET%FIELD("PT")%BLOCK_DATA(IBL,PT,CONFIG) - ! call FSET%FIELD("PQ")%BLOCK_DATA(IBL,PQ,CONFIG) - ! call cloudsc_atlas ( FSET, IBL, config ) - CALL CLOUDSC & & ( 1, ICEND, NPROMA, NLEV,& & PTSPHY,& diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index ba18ac74..0bef9bc0 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -112,67 +112,6 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD PROCEDURE :: GET_BLOCK => CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK END TYPE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW - TYPE CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS - !TYPE(atlas_field), allocatlabe :: fields(:) - - ! Input field variables and tendencies - TYPE(atlas_field) :: fPLCRIT_AER - TYPE(atlas_field) :: fPICRIT_AER - TYPE(atlas_field) :: fPRE_ICE - TYPE(atlas_field) :: fPCCN ! liquid cloud condensation nuclei - TYPE(atlas_field) :: fPNICE ! ice number concentration (cf. CCN) - TYPE(atlas_field) :: fPT ! T at start of callpar - TYPE(atlas_field) :: fPQ ! Q at start of callpar - TYPE(atlas_field) :: fPVFA ! CC from VDF scheme - TYPE(atlas_field) :: fPVFL ! Liq from VDF scheme - TYPE(atlas_field) :: fPVFI ! Ice from VDF scheme - TYPE(atlas_field) :: fPDYNA ! CC from Dynamics - TYPE(atlas_field) :: fPDYNL ! Liq from Dynamics - TYPE(atlas_field) :: fPDYNI ! Liq from Dynamics - TYPE(atlas_field) :: fPHRSW ! Short-wave heating rate - TYPE(atlas_field) :: fPHRLW ! Long-wave heating rate - TYPE(atlas_field) :: fPVERVEL ! Vertical velocity - TYPE(atlas_field) :: fPAP ! Pressure on full levels - TYPE(atlas_field) :: fPLU ! Conv. condensate - TYPE(atlas_field) :: fPLUDE ! Conv. detrained water - TYPE(atlas_field) :: fPSNDE ! Conv. detrained snow - TYPE(atlas_field) :: fPMFU ! Conv. mass flux up - TYPE(atlas_field) :: fPMFD ! Conv. mass flux down - TYPE(atlas_field) :: fPA ! Original Cloud fraction (t) - TYPE(atlas_field) :: fPSUPSAT - - TYPE(atlas_field) :: fPLSM ! Land fraction (0-1) - TYPE(atlas_field) :: fLDCUM ! Convection active - TYPE(atlas_field) :: fKTYPE ! Convection type 0,1,2 - TYPE(atlas_field) :: fPAPH ! Pressure on half levels - TYPE(atlas_field) :: fPEXTRA ! extra fields - TYPE(atlas_field) :: fPCLV - - TYPE(atlas_field) :: fTENDENCY_CML ! cumulative tendency used for final output - TYPE(atlas_field) :: fTENDENCY_TMP ! cumulative tendency used as input - TYPE(atlas_field) :: fTENDENCY_LOC ! local tendency from cloud scheme - - ! Output fields used for validation - TYPE(atlas_field) :: fPFSQLF ! Flux of liquid - TYPE(atlas_field) :: fPFSQIF ! Flux of ice - TYPE(atlas_field) :: fPFCQLNG ! -ve corr for liq - TYPE(atlas_field) :: fPFCQNNG ! -ve corr for ice - TYPE(atlas_field) :: fPFSQRF ! Flux diagnostics - TYPE(atlas_field) :: fPFSQSF ! for DDH, generic - TYPE(atlas_field) :: fPFCQRNG ! rain - TYPE(atlas_field) :: fPFCQSNG ! snow - TYPE(atlas_field) :: fPFSQLTUR ! liquid flux due to VDF - TYPE(atlas_field) :: fPFSQITUR ! ice flux due to VDF - TYPE(atlas_field) :: fPFPLSL ! liq+rain sedim flux - TYPE(atlas_field) :: fPFPLSN ! ice+snow sedim flux - TYPE(atlas_field) :: fPFHPSL ! Enthalpy flux for liq - TYPE(atlas_field) :: fPFHPSN ! Enthalpy flux for ice - TYPE(atlas_field) :: fPCOVPTOT ! Precip fraction - TYPE(atlas_field) :: fPRAINFRAC_TOPRFZ - CONTAINS - PROCEDURE :: SETUP => CLOUDSC_GLOBAL_ATLAS_SETUP_BLOCK - END TYPE CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS - TYPE CLOUDSC_GLOBAL_ATLAS_STATE ! Memory state containing raw fields annd tendencies for CLOUDSC dwarf ! @@ -192,134 +131,76 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD CONTAINS - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_SETUP_BLOCK(SELF, FSET) - CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS), INTENT(INOUT) :: SELF - TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET - - SELF%fPLCRIT_AER = FSET%FIELD("PLCRIT_AER") - SELF%fPICRIT_AER = FSET%FIELD("PICRIT_AER") - SELF%fPRE_ICE = FSET%FIELD("PRE_ICE") - SELF%fPCCN = FSET%FIELD("PCCN") - SELF%fPNICE = FSET%FIELD("PNICE") - SELF%fPT = FSET%FIELD("PT") - SELF%fPQ = FSET%FIELD("PQ") - SELF%fPVFA = FSET%FIELD("PVFA") - SELF%fPVFL = FSET%FIELD("PVFL") - SELF%fPVFI = FSET%FIELD("PVFI") - SELF%fPDYNA = FSET%FIELD("PDYNA") - SELF%fPDYNL = FSET%FIELD("PDYNL") - SELF%fPDYNI = FSET%FIELD("PDYNI") - SELF%fPHRSW = FSET%FIELD("PHRSW") - SELF%fPHRLW = FSET%FIELD("PHRLW") - SELF%fPVERVEL = FSET%FIELD("PVERVEL") - SELF%fPAP = FSET%FIELD("PAP") - SELF%fPLU = FSET%FIELD("PLU") - SELF%fPLUDE = FSET%FIELD("PLUDE") - SELF%fPSNDE = FSET%FIELD("PSNDE") - SELF%fPMFU = FSET%FIELD("PMFU") - SELF%fPMFD = FSET%FIELD("PMFD") - SELF%fPA = FSET%FIELD("PA") - SELF%fPSUPSAT = FSET%FIELD("PSUPSAT") - SELF%fPLSM = FSET%FIELD("PLSM") - SELF%fLDCUM = FSET%FIELD("LDCUM") - SELF%fKTYPE = FSET%FIELD("KTYPE") - SELF%fPAPH = FSET%FIELD("PAPH") - SELF%fPEXTRA = FSET%FIELD("PEXTRA") - SELF%fPCLV = FSET%FIELD("PCLV") - - SELF%fTENDENCY_CML = FSET%FIELD('TENDENCY_CML') - SELF%fTENDENCY_TMP = FSET%FIELD('TENDENCY_TMP') - SELF%fTENDENCY_LOC = FSET%FIELD('TENDENCY_LOC') - - SELF%fPFSQLF = FSET%FIELD("PFSQLF") - SELF%fPFSQIF = FSET%FIELD("PFSQIF") - SELF%fPFCQLNG = FSET%FIELD("PFCQLNG") - SELF%fPFCQNNG = FSET%FIELD("PFCQNNG") - SELF%fPFSQRF = FSET%FIELD("PFSQRF") - SELF%fPFSQSF = FSET%FIELD("PFSQSF") - SELF%fPFCQRNG = FSET%FIELD("PFCQRNG") - SELF%fPFCQSNG = FSET%FIELD("PFCQSNG") - SELF%fPFSQLTUR = FSET%FIELD("PFSQLTUR") - SELF%fPFSQITUR = FSET%FIELD("PFSQITUR") - SELF%fPFPLSL = FSET%FIELD("PFPLSL") - SELF%fPFPLSN = FSET%FIELD("PFPLSN") - SELF%fPFHPSL = FSET%FIELD("PFHPSL") - SELF%fPFHPSN = FSET%FIELD("PFHPSN") - SELF%fPCOVPTOT = FSET%FIELD("PCOVPTOT") - SELF%fPRAINFRAC_TOPRFZ = FSET%FIELD("PRAINFRAC_TOPRFZ") - END SUBROUTINE - - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FIELDS, IBLK) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW), INTENT(INOUT) :: SELF - CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_FIELDS), INTENT(INOUT) :: FIELDS + CLASS(ATLAS_FIELDSET), INTENT(INOUT) :: FSET INTEGER, INTENT(IN) :: IBLK REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) - ! NOTE the last six input variables need special treatment - different types - CALL FIELDS%fPLCRIT_AER%DATA(SELF%PLCRIT_AER, IBLK) - CALL FIELDS%fPICRIT_AER%DATA(SELF%PICRIT_AER, IBLK) - CALL FIELDS%fPRE_ICE%DATA(SELF%PRE_ICE, IBLK) - CALL FIELDS%fPCCN%DATA(SELF%PCCN, IBLK) - CALL FIELDS%fPNICE%DATA(SELF%PNICE, IBLK) - CALL FIELDS%fPT%DATA(SELF%PT, IBLK) - CALL FIELDS%fPQ%DATA(SELF%PQ, IBLK) - CALL FIELDS%fPVFA%DATA(SELF%PVFA, IBLK) - CALL FIELDS%fPVFL%DATA(SELF%PVFL, IBLK) - CALL FIELDS%fPVFI%DATA(SELF%PVFI, IBLK) - CALL FIELDS%fPDYNA%DATA(SELF%PDYNA, IBLK) - CALL FIELDS%fPDYNL%DATA(SELF%PDYNL, IBLK) - CALL FIELDS%fPDYNI%DATA(SELF%PDYNI, IBLK) - CALL FIELDS%fPHRSW%DATA(SELF%PHRSW, IBLK) - CALL FIELDS%fPHRLW%DATA(SELF%PHRLW, IBLK) - CALL FIELDS%fPVERVEL%DATA(SELF%PVERVEL, IBLK) - CALL FIELDS%fPAP%DATA(SELF%PAP, IBLK) - CALL FIELDS%fPLU%DATA(SELF%PLU, IBLK) - CALL FIELDS%fPLUDE%DATA(SELF%PLUDE, IBLK) - CALL FIELDS%fPSNDE%DATA(SELF%PSNDE, IBLK) - CALL FIELDS%fPMFU%DATA(SELF%PMFU, IBLK) - CALL FIELDS%fPMFD%DATA(SELF%PMFD, IBLK) - CALL FIELDS%fPA%DATA(SELF%PA, IBLK) - CALL FIELDS%fPSUPSAT%DATA(SELF%PSUPSAT, IBLK) - CALL FIELDS%fPLSM%DATA(SELF%PLSM, IBLK) - CALL FIELDS%fLDCUM%DATA(SELF%LDCUM, IBLK) - CALL FIELDS%fKTYPE%DATA(SELF%KTYPE, IBLK) - CALL FIELDS%fPAPH%DATA(SELF%PAPH, IBLK) - CALL FIELDS%fPEXTRA%DATA(SELF%PEXTRA, IBLK) - CALL FIELDS%fPCLV%DATA(SELF%PCLV, IBLK) - - CALL FIELDS%fTENDENCY_CML%DATA(TMP3D, IBLK) + CALL FSET%DATA("PLCRIT_AER", SELF%PLCRIT_AER, IBLK) + CALL FSET%DATA("PICRIT_AER", SELF%PICRIT_AER, IBLK) + CALL FSET%DATA("PRE_ICE", SELF%PRE_ICE, IBLK) + CALL FSET%DATA("PCCN", SELF%PCCN, IBLK) + CALL FSET%DATA("PNICE", SELF%PNICE, IBLK) + CALL FSET%DATA("PT", SELF%PT, IBLK) + CALL FSET%DATA("PQ", SELF%PQ, IBLK) + CALL FSET%DATA("PVFA", SELF%PVFA, IBLK) + CALL FSET%DATA("PVFL", SELF%PVFL, IBLK) + CALL FSET%DATA("PVFI", SELF%PVFI, IBLK) + CALL FSET%DATA("PDYNA", SELF%PDYNA, IBLK) + CALL FSET%DATA("PDYNL", SELF%PDYNL, IBLK) + CALL FSET%DATA("PDYNI", SELF%PDYNI, IBLK) + CALL FSET%DATA("PHRSW", SELF%PHRSW, IBLK) + CALL FSET%DATA("PHRLW", SELF%PHRLW, IBLK) + CALL FSET%DATA("PVERVEL", SELF%PVERVEL, IBLK) + CALL FSET%DATA("PAP", SELF%PAP, IBLK) + CALL FSET%DATA("PLU", SELF%PLU, IBLK) + CALL FSET%DATA("PLUDE", SELF%PLUDE, IBLK) + CALL FSET%DATA("PSNDE", SELF%PSNDE, IBLK) + CALL FSET%DATA("PMFU", SELF%PMFU, IBLK) + CALL FSET%DATA("PMFD", SELF%PMFD, IBLK) + CALL FSET%DATA("PA", SELF%PA, IBLK) + CALL FSET%DATA("PSUPSAT", SELF%PSUPSAT, IBLK) + CALL FSET%DATA("PLSM", SELF%PLSM, IBLK) + CALL FSET%DATA("LDCUM", SELF%LDCUM, IBLK) + CALL FSET%DATA("KTYPE", SELF%KTYPE, IBLK) + CALL FSET%DATA("PAPH", SELF%PAPH, IBLK) + CALL FSET%DATA("PEXTRA", SELF%PEXTRA, IBLK) + CALL FSET%DATA("PCLV", SELF%PCLV, IBLK) + + CALL FSET%DATA("TENDENCY_CML", TMP3D, IBLK) SELF%TENDENCY_CML%T => TMP3D(:,:,1) SELF%TENDENCY_CML%A => TMP3D(:,:,2) SELF%TENDENCY_CML%Q => TMP3D(:,:,3) SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) - CALL FIELDS%fTENDENCY_TMP%DATA(TMP3D, IBLK) + CALL FSET%DATA("TENDENCY_TMP", TMP3D, IBLK) SELF%TENDENCY_TMP%T => TMP3D(:,:,1) SELF%TENDENCY_TMP%A => TMP3D(:,:,2) SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) - CALL FIELDS%fTENDENCY_LOC%DATA(TMP3D, IBLK) + CALL FSET%DATA("TENDENCY_LOC", TMP3D, IBLK) SELF%TENDENCY_LOC%T => TMP3D(:,:,1) SELF%TENDENCY_LOC%A => TMP3D(:,:,2) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) - CALL FIELDS%fPFSQLF%DATA(SELF%PFSQLF, IBLK) - CALL FIELDS%fPFSQIF%DATA(SELF%PFSQIF, IBLK) - CALL FIELDS%fPFCQLNG%DATA(SELF%PFCQLNG, IBLK) - CALL FIELDS%fPFCQNNG%DATA(SELF%PFCQNNG, IBLK) - CALL FIELDS%fPFSQRF%DATA(SELF%PFSQRF, IBLK) - CALL FIELDS%fPFSQSF%DATA(SELF%PFSQSF, IBLK) - CALL FIELDS%fPFCQRNG%DATA(SELF%PFCQRNG, IBLK) - CALL FIELDS%fPFCQSNG%DATA(SELF%PFCQSNG, IBLK) - CALL FIELDS%fPFSQLTUR%DATA(SELF%PFSQLTUR, IBLK) - CALL FIELDS%fPFSQITUR%DATA(SELF%PFSQITUR, IBLK) - CALL FIELDS%fPFPLSL%DATA(SELF%PFPLSL, IBLK) - CALL FIELDS%fPFPLSN%DATA(SELF%PFPLSN, IBLK) - CALL FIELDS%fPFHPSL%DATA(SELF%PFHPSL, IBLK) - CALL FIELDS%fPFHPSN%DATA(SELF%PFHPSN, IBLK) - CALL FIELDS%fPCOVPTOT%DATA(SELF%PCOVPTOT, IBLK) - CALL FIELDS%fPRAINFRAC_TOPRFZ%DATA(SELF%PRAINFRAC_TOPRFZ, IBLK) + CALL FSET%DATA("PFSQLF", SELF%PFSQLF, IBLK) + CALL FSET%DATA("PFSQIF", SELF%PFSQIF, IBLK) + CALL FSET%DATA("PFCQLNG", SELF%PFCQLNG, IBLK) + CALL FSET%DATA("PFCQNNG", SELF%PFCQNNG, IBLK) + CALL FSET%DATA("PFSQRF", SELF%PFSQRF, IBLK) + CALL FSET%DATA("PFSQSF", SELF%PFSQSF, IBLK) + CALL FSET%DATA("PFCQRNG", SELF%PFCQRNG, IBLK) + CALL FSET%DATA("PFCQSNG", SELF%PFCQSNG, IBLK) + CALL FSET%DATA("PFSQLTUR", SELF%PFSQLTUR, IBLK) + CALL FSET%DATA("PFSQITUR", SELF%PFSQITUR, IBLK) + CALL FSET%DATA("PFPLSL", SELF%PFPLSL, IBLK) + CALL FSET%DATA("PFPLSN", SELF%PFPLSN, IBLK) + CALL FSET%DATA("PFHPSL", SELF%PFHPSL, IBLK) + CALL FSET%DATA("PFHPSN", SELF%PFHPSN, IBLK) + CALL FSET%DATA("PCOVPTOT", SELF%PCOVPTOT, IBLK) + CALL FSET%DATA("PRAINFRAC_TOPRFZ", SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) From f9a1657e2a48ab0891dd9da24a8948d89bc55d2f Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Sun, 21 May 2023 23:52:02 +0200 Subject: [PATCH 037/174] switch to index to acces a field from a FieldSet rather than using string --- .../cloudsc_global_atlas_state_mod.F90 | 103 +++++++++--------- 1 file changed, 50 insertions(+), 53 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 0bef9bc0..ba09384f 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -34,9 +34,6 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD REAL(C_DOUBLE), POINTER :: PTR(:,:) END TYPE - !INTEGER, PARAMETER PLCRIT_AER = 1 - !INTEGER, PARAMETER PLCRIT_AER = 2 - !IN_VAR_NAMES(PLCRIT_AER) CHARACTER(LEN=10), PARAMETER, DIMENSION(30) :: IN_VAR_NAMES = (/ & "PLCRIT_AER", "PICRIT_AER", "PRE_ICE ", "PCCN ", "PNICE ", "PT ", "PQ ", & "PVFA ", "PVFL ", "PVFI ", "PDYNA ", "PDYNL ", "PDYNI ", "PHRSW ", & @@ -138,69 +135,69 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) - CALL FSET%DATA("PLCRIT_AER", SELF%PLCRIT_AER, IBLK) - CALL FSET%DATA("PICRIT_AER", SELF%PICRIT_AER, IBLK) - CALL FSET%DATA("PRE_ICE", SELF%PRE_ICE, IBLK) - CALL FSET%DATA("PCCN", SELF%PCCN, IBLK) - CALL FSET%DATA("PNICE", SELF%PNICE, IBLK) - CALL FSET%DATA("PT", SELF%PT, IBLK) - CALL FSET%DATA("PQ", SELF%PQ, IBLK) - CALL FSET%DATA("PVFA", SELF%PVFA, IBLK) - CALL FSET%DATA("PVFL", SELF%PVFL, IBLK) - CALL FSET%DATA("PVFI", SELF%PVFI, IBLK) - CALL FSET%DATA("PDYNA", SELF%PDYNA, IBLK) - CALL FSET%DATA("PDYNL", SELF%PDYNL, IBLK) - CALL FSET%DATA("PDYNI", SELF%PDYNI, IBLK) - CALL FSET%DATA("PHRSW", SELF%PHRSW, IBLK) - CALL FSET%DATA("PHRLW", SELF%PHRLW, IBLK) - CALL FSET%DATA("PVERVEL", SELF%PVERVEL, IBLK) - CALL FSET%DATA("PAP", SELF%PAP, IBLK) - CALL FSET%DATA("PLU", SELF%PLU, IBLK) - CALL FSET%DATA("PLUDE", SELF%PLUDE, IBLK) - CALL FSET%DATA("PSNDE", SELF%PSNDE, IBLK) - CALL FSET%DATA("PMFU", SELF%PMFU, IBLK) - CALL FSET%DATA("PMFD", SELF%PMFD, IBLK) - CALL FSET%DATA("PA", SELF%PA, IBLK) - CALL FSET%DATA("PSUPSAT", SELF%PSUPSAT, IBLK) - CALL FSET%DATA("PLSM", SELF%PLSM, IBLK) - CALL FSET%DATA("LDCUM", SELF%LDCUM, IBLK) - CALL FSET%DATA("KTYPE", SELF%KTYPE, IBLK) - CALL FSET%DATA("PAPH", SELF%PAPH, IBLK) - CALL FSET%DATA("PEXTRA", SELF%PEXTRA, IBLK) - CALL FSET%DATA("PCLV", SELF%PCLV, IBLK) - - CALL FSET%DATA("TENDENCY_CML", TMP3D, IBLK) + CALL FSET%DATA(1, SELF%PLCRIT_AER, IBLK) + CALL FSET%DATA(2, SELF%PICRIT_AER, IBLK) + CALL FSET%DATA(3, SELF%PRE_ICE, IBLK) + CALL FSET%DATA(4, SELF%PCCN, IBLK) + CALL FSET%DATA(5, SELF%PNICE, IBLK) + CALL FSET%DATA(6, SELF%PT, IBLK) + CALL FSET%DATA(7, SELF%PQ, IBLK) + CALL FSET%DATA(8, SELF%PVFA, IBLK) + CALL FSET%DATA(9, SELF%PVFL, IBLK) + CALL FSET%DATA(10, SELF%PVFI, IBLK) + CALL FSET%DATA(11, SELF%PDYNA, IBLK) + CALL FSET%DATA(12, SELF%PDYNL, IBLK) + CALL FSET%DATA(13, SELF%PDYNI, IBLK) + CALL FSET%DATA(14, SELF%PHRSW, IBLK) + CALL FSET%DATA(15, SELF%PHRLW, IBLK) + CALL FSET%DATA(16, SELF%PVERVEL, IBLK) + CALL FSET%DATA(17, SELF%PAP, IBLK) + CALL FSET%DATA(18, SELF%PLU, IBLK) + CALL FSET%DATA(19, SELF%PLUDE, IBLK) + CALL FSET%DATA(20, SELF%PSNDE, IBLK) + CALL FSET%DATA(21, SELF%PMFU, IBLK) + CALL FSET%DATA(22, SELF%PMFD, IBLK) + CALL FSET%DATA(23, SELF%PA, IBLK) + CALL FSET%DATA(24, SELF%PSUPSAT, IBLK) + CALL FSET%DATA(25, SELF%PLSM, IBLK) + CALL FSET%DATA(26, SELF%LDCUM, IBLK) + CALL FSET%DATA(27, SELF%KTYPE, IBLK) + CALL FSET%DATA(28, SELF%PAPH, IBLK) + CALL FSET%DATA(29, SELF%PEXTRA, IBLK) + CALL FSET%DATA(30, SELF%PCLV, IBLK) + + CALL FSET%DATA(31, TMP3D, IBLK) SELF%TENDENCY_CML%T => TMP3D(:,:,1) SELF%TENDENCY_CML%A => TMP3D(:,:,2) SELF%TENDENCY_CML%Q => TMP3D(:,:,3) SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) - CALL FSET%DATA("TENDENCY_TMP", TMP3D, IBLK) + CALL FSET%DATA(32, TMP3D, IBLK) SELF%TENDENCY_TMP%T => TMP3D(:,:,1) SELF%TENDENCY_TMP%A => TMP3D(:,:,2) SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) - CALL FSET%DATA("TENDENCY_LOC", TMP3D, IBLK) + CALL FSET%DATA(33, TMP3D, IBLK) SELF%TENDENCY_LOC%T => TMP3D(:,:,1) SELF%TENDENCY_LOC%A => TMP3D(:,:,2) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) - CALL FSET%DATA("PFSQLF", SELF%PFSQLF, IBLK) - CALL FSET%DATA("PFSQIF", SELF%PFSQIF, IBLK) - CALL FSET%DATA("PFCQLNG", SELF%PFCQLNG, IBLK) - CALL FSET%DATA("PFCQNNG", SELF%PFCQNNG, IBLK) - CALL FSET%DATA("PFSQRF", SELF%PFSQRF, IBLK) - CALL FSET%DATA("PFSQSF", SELF%PFSQSF, IBLK) - CALL FSET%DATA("PFCQRNG", SELF%PFCQRNG, IBLK) - CALL FSET%DATA("PFCQSNG", SELF%PFCQSNG, IBLK) - CALL FSET%DATA("PFSQLTUR", SELF%PFSQLTUR, IBLK) - CALL FSET%DATA("PFSQITUR", SELF%PFSQITUR, IBLK) - CALL FSET%DATA("PFPLSL", SELF%PFPLSL, IBLK) - CALL FSET%DATA("PFPLSN", SELF%PFPLSN, IBLK) - CALL FSET%DATA("PFHPSL", SELF%PFHPSL, IBLK) - CALL FSET%DATA("PFHPSN", SELF%PFHPSN, IBLK) - CALL FSET%DATA("PCOVPTOT", SELF%PCOVPTOT, IBLK) - CALL FSET%DATA("PRAINFRAC_TOPRFZ", SELF%PRAINFRAC_TOPRFZ, IBLK) + CALL FSET%DATA(34, SELF%PFSQLF, IBLK) + CALL FSET%DATA(35, SELF%PFSQIF, IBLK) + CALL FSET%DATA(36, SELF%PFCQLNG, IBLK) + CALL FSET%DATA(37, SELF%PFCQNNG, IBLK) + CALL FSET%DATA(38, SELF%PFSQRF, IBLK) + CALL FSET%DATA(39, SELF%PFSQSF, IBLK) + CALL FSET%DATA(40, SELF%PFCQRNG, IBLK) + CALL FSET%DATA(41, SELF%PFCQSNG, IBLK) + CALL FSET%DATA(42, SELF%PFSQLTUR, IBLK) + CALL FSET%DATA(43, SELF%PFSQITUR, IBLK) + CALL FSET%DATA(44, SELF%PFPLSL, IBLK) + CALL FSET%DATA(45, SELF%PFPLSN, IBLK) + CALL FSET%DATA(46, SELF%PFHPSL, IBLK) + CALL FSET%DATA(47, SELF%PFHPSN, IBLK) + CALL FSET%DATA(48, SELF%PCOVPTOT, IBLK) + CALL FSET%DATA(49, SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) From e5c9200852d099a5e32c6de7207aac667fe762ce Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 20 Mar 2023 22:03:27 +0000 Subject: [PATCH 038/174] Add a loki-idem-stack target --- src/cloudsc_loki/CMakeLists.txt | 109 ++++++++++++++++++++++++++++++++ src/cloudsc_loki/stack_mod.F90 | 21 ++++++ 2 files changed, 130 insertions(+) create mode 100644 src/cloudsc_loki/stack_mod.F90 diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index a8b52177..ac935235 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -136,6 +136,70 @@ if( HAVE_CLOUDSC_LOKI ) CONDITION HAVE_OMP AND HAVE_MPI ) + ############################################################ + ## Idempotence mode with pool-allocator for temporaries: ## + ## * Internal "do-nothing" mode for Loki debug ## + ############################################################ + + cloudsc_xmod( loki-idem-stack ) + + loki_transform_convert( + MODE idem-stack FRONTEND ${LOKI_FRONTEND} CPP + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + PATH ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 + INCLUDES ${COMMON_INCLUDE} + XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} + OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem-stack + OUTPUT loki-idem-stack/cloudsc.idem_stack.F90 loki-idem-stack/cloudsc_driver_loki_mod.idem_stack.F90 + DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc-loki-idem-stack + SOURCES + dwarf_cloudsc.F90 + stack_mod.F90 + loki-idem-stack/cloudsc_driver_loki_mod.idem_stack.F90 + loki-idem-stack/cloudsc.idem_stack.F90 + LIBS + cloudsc-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-loki-idem-stack-serial + COMMAND bin/dwarf-cloudsc-loki-idem-stack + ARGS 1 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) + ecbuild_add_test( + TARGET dwarf-cloudsc-loki-idem-stack-omp + COMMAND bin/dwarf-cloudsc-loki-idem-stack + ARGS 4 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 4 + CONDITION HAVE_OMP + ) + ecbuild_add_test( + TARGET dwarf-cloudsc-loki-idem-stack-mpi + COMMAND bin/dwarf-cloudsc-loki-idem-stack + ARGS 1 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + MPI 2 + OMP 1 + CONDITION HAVE_MPI + ) + ecbuild_add_test( + TARGET dwarf-cloudsc-loki-idem-stack-mpi-omp + COMMAND bin/dwarf-cloudsc-loki-idem-stack + ARGS 4 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + MPI 2 + OMP 4 + CONDITION HAVE_OMP AND HAVE_MPI + ) + #################################################### ## SCA mode (Single Column Abstraction): ## @@ -373,6 +437,51 @@ if( HAVE_CLOUDSC_LOKI ) ENVIRONMENT "NVCOMPILER_ACC_CUDA_HEAPSIZE=64M" ) + ###################################################### + ## "Single Column Coalesced" (SCC) mode with stack ## + ## * Removes horizontal vector loops ## + ## * Invokes compute kernel as `!$acc vector` ## + ## * Allocates temporaries using pool allocator ## + ###################################################### + + cloudsc_xmod( loki-scc-stack ) + + loki_transform_convert( + MODE scc-stack FRONTEND ${LOKI_FRONTEND} CPP + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + PATH ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 + INCLUDES ${COMMON_INCLUDE} + DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC} + DATA_OFFLOAD REMOVE_OPENMP + XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} + OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-stack + OUTPUT + loki-scc-stack/cloudsc.scc_stack.F90 + loki-scc-stack/cloudsc_driver_loki_mod.scc_stack.F90 + DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + ) + + ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc-stack + SOURCES + dwarf_cloudsc.F90 + stack_mod.F90 + loki-scc-stack/cloudsc_driver_loki_mod.scc_stack.F90 + loki-scc-stack/cloudsc.scc_stack.F90 + LIBS + cloudsc-common-lib + DEFINITIONS ${CLOUDSC_DEFINITIONS} + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-loki-scc-stack-serial + COMMAND bin/dwarf-cloudsc-loki-scc-stack + ARGS 1 1280 128 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) + + #################################################### ## SCC-hoist mode ## ## * SCC with vector loop hoisted ## diff --git a/src/cloudsc_loki/stack_mod.F90 b/src/cloudsc_loki/stack_mod.F90 new file mode 100644 index 00000000..8d076801 --- /dev/null +++ b/src/cloudsc_loki/stack_mod.F90 @@ -0,0 +1,21 @@ +! (C) Copyright 1988- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +MODULE STACK_MOD + +IMPLICIT NONE + +TYPE STACK + INTEGER*8 :: L, U +END TYPE + +PRIVATE +PUBLIC :: STACK + +END MODULE From f1a0c0d8bcfa1312d32c5baa7350eac7e7628aec Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 14 Apr 2023 10:49:53 +0100 Subject: [PATCH 039/174] Add stack targets to Github actions --- .github/scripts/verify-targets.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 0cdc6b8e..7040502d 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -37,6 +37,7 @@ if [[ "$loki_flag" == "--with-loki" ]] then targets+=(dwarf-cloudsc-loki-idem dwarf-cloudsc-loki-sca) targets+=(dwarf-cloudsc-loki-scc dwarf-cloudsc-loki-scc-hoist) + targets+=(dwarf-cloudsc-loki-idem-stack dwarf-cloudsc-loki-scc-stack) if [[ "$prec_flag" != "--single-precision" ]] then targets+=(dwarf-cloudsc-loki-c) From e19bc3b14b5a22a8f41af022478e76d07b01278b Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 14 Apr 2023 16:31:03 +0100 Subject: [PATCH 040/174] Add Cray Pointer flags to compile options --- src/cloudsc_loki/CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index ac935235..aa681c14 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -166,6 +166,12 @@ if( HAVE_CLOUDSC_LOKI ) DEFINITIONS ${CLOUDSC_DEFINITIONS} ) + if( CMAKE_Fortran_COMPILER_ID MATCHES "GNU") + target_compile_options( dwarf-cloudsc-loki-idem-stack PRIVATE "-fcray-pointer" ) + elseif( CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC" OR CMAKE_Fortran_COMPILER_ID MATCHES "PGI" ) + target_compile_options( dwarf-cloudsc-loki-idem-stack PRIVATE "-Mcray=pointer" ) + endif() + ecbuild_add_test( TARGET dwarf-cloudsc-loki-idem-stack-serial COMMAND bin/dwarf-cloudsc-loki-idem-stack @@ -473,6 +479,12 @@ if( HAVE_CLOUDSC_LOKI ) DEFINITIONS ${CLOUDSC_DEFINITIONS} ) + if( CMAKE_Fortran_COMPILER_ID MATCHES "GNU") + target_compile_options( dwarf-cloudsc-loki-scc-stack PRIVATE "-fcray-pointer" ) + elseif( CMAKE_Fortran_COMPILER_ID MATCHES "NVHPC" OR CMAKE_Fortran_COMPILER_ID MATCHES "PGI" ) + target_compile_options( dwarf-cloudsc-loki-scc-stack PRIVATE "-Mcray=pointer" ) + endif() + ecbuild_add_test( TARGET dwarf-cloudsc-loki-scc-stack-serial COMMAND bin/dwarf-cloudsc-loki-scc-stack From 624b3b976c46856fcc9d9c42300f58b77e81a9d3 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 12 Jun 2023 17:06:52 +0100 Subject: [PATCH 041/174] Enable gvmode on NVHPC --- arch/toolchains/ecmwf-hpc2020-nvhpc.cmake | 4 +++- src/cloudsc_loki/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/toolchains/ecmwf-hpc2020-nvhpc.cmake b/arch/toolchains/ecmwf-hpc2020-nvhpc.cmake index f191e9a7..6ea418b4 100644 --- a/arch/toolchains/ecmwf-hpc2020-nvhpc.cmake +++ b/arch/toolchains/ecmwf-hpc2020-nvhpc.cmake @@ -31,9 +31,11 @@ set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") # OpenAcc FLAGS #################################################################### +# Importantly, enable `gvmode` to remove the limit of 32 vector threads +# per thread block # NB: We have to add `-mp` again to avoid undefined symbols during linking # (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath,gvmode" CACHE STRING "" ) # Enable this to get more detailed compiler output # set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index aa681c14..90a5abf0 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -440,7 +440,7 @@ if( HAVE_CLOUDSC_LOKI ) ARGS 1 1280 128 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 - ENVIRONMENT "NVCOMPILER_ACC_CUDA_HEAPSIZE=64M" + ENVIRONMENT "NVCOMPILER_ACC_CUDA_HEAPSIZE=128M" ) ###################################################### From 475b43a763c11b1f71721deca463663df9920b53 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 12 Jun 2023 17:07:24 +0100 Subject: [PATCH 042/174] Add nvidia/22.11 arch files for hpc2020 --- arch/ecmwf/hpc2020/nvhpc/22.11/env.sh | 52 +++++++++++++++++++ .../ecmwf/hpc2020/nvhpc/22.11/toolchain.cmake | 1 + 2 files changed, 53 insertions(+) create mode 100644 arch/ecmwf/hpc2020/nvhpc/22.11/env.sh create mode 120000 arch/ecmwf/hpc2020/nvhpc/22.11/toolchain.cmake diff --git a/arch/ecmwf/hpc2020/nvhpc/22.11/env.sh b/arch/ecmwf/hpc2020/nvhpc/22.11/env.sh new file mode 100644 index 00000000..c856f467 --- /dev/null +++ b/arch/ecmwf/hpc2020/nvhpc/22.11/env.sh @@ -0,0 +1,52 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module_unload nvidia +module_unload intel-mpi +module_unload openmpi +module_unload hpcx-openmpi +module_unload boost +module_unload hdf5 +module_unload cmake +module_unload python3 +module_unload java + +# Load modules +module_load prgenv/nvidia +module_load nvidia/22.11 +module_load hpcx-openmpi/2.10.0 +# module_load boost/1.71.0 +module_load hdf5/1.10.6 +module_load cmake/3.25.2 +module_load python3/3.10.10-01 +module_load java/11.0.6 + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/ecmwf/hpc2020/nvhpc/22.11/toolchain.cmake b/arch/ecmwf/hpc2020/nvhpc/22.11/toolchain.cmake new file mode 120000 index 00000000..7b14d221 --- /dev/null +++ b/arch/ecmwf/hpc2020/nvhpc/22.11/toolchain.cmake @@ -0,0 +1 @@ +../../../../toolchains/ecmwf-hpc2020-nvhpc.cmake \ No newline at end of file From a08120c44f7d120ccc67ddf88b72de2c07ccf218 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 10 Jul 2023 17:09:40 +0100 Subject: [PATCH 043/174] Change -Mcuda to -cuda and link common lib using -cuda --- src/cloudsc_gpu/CMakeLists.txt | 8 +++--- src/cloudsc_loki/CMakeLists.txt | 14 +++++------ src/common/CMakeLists.txt | 6 ++--- src/common/module/cloudsc_field_state_mod.F90 | 12 ++++----- src/common/module/yomphyder.F90 | 25 +++++++++---------- 5 files changed, 31 insertions(+), 34 deletions(-) diff --git a/src/cloudsc_gpu/CMakeLists.txt b/src/cloudsc_gpu/CMakeLists.txt index 33db51dd..3df08bc9 100644 --- a/src/cloudsc_gpu/CMakeLists.txt +++ b/src/cloudsc_gpu/CMakeLists.txt @@ -180,10 +180,10 @@ endif() if( HAVE_CLOUDSC_GPU_SCC_CUF ) - # Compile CUDA fortran files with -MCuda. + # Compile CUDA fortran files with -cuda. cloudsc_add_compile_options( SOURCES cloudsc_gpu_scc_cuf_mod.F90 cloudsc_driver_gpu_scc_cuf_mod.F90 - FLAGS "-Mcuda=maxregcount:128") + FLAGS "-cuda -gpu=maxregcount:128") ecbuild_add_executable( TARGET dwarf-cloudsc-gpu-scc-cuf @@ -208,10 +208,10 @@ endif() if ( HAVE_CLOUDSC_GPU_SCC_CUF_K_CACHING ) # NEW CUF with k-caching!!!! - # Compile CUDA fortran files with -MCuda. + # Compile CUDA fortran files with -cuda. cloudsc_add_compile_options( SOURCES cloudsc_gpu_scc_cuf_k_caching_mod.F90 cloudsc_driver_gpu_scc_cuf_k_caching_mod.F90 - FLAGS "-Mcuda=maxregcount:128") + FLAGS "-cuda -gpu=maxregcount:128") ecbuild_add_executable( TARGET dwarf-cloudsc-gpu-scc-cuf-k-caching diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index 90a5abf0..0c55b731 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -571,13 +571,13 @@ if( HAVE_CUDA ) OUTPUT loki-scc-cuf-parametrise/cuf_cloudsc_driver_loki_mod.cuf_parametrise.F90 loki-scc-cuf-parametrise/cuf_cloudsc.cuf_parametrise.F90 - DEPENDS cuf_cloudsc.F90 cuf_cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + DEPENDS cuf_cloudsc.F90 cuf_cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} ) set_source_files_properties( loki-scc-cuf-parametrise/cuf_cloudsc_driver_loki_mod.cuf_parametrise.F90 loki-scc-cuf-parametrise/cuf_cloudsc.cuf_parametrise.F90 - PROPERTIES COMPILE_FLAGS "-Mcuda=maxregcount:128" + PROPERTIES COMPILE_FLAGS "-cuda -gpu=maxregcount:128" ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc-cuf-parametrise @@ -590,8 +590,7 @@ if( HAVE_CUDA ) DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC_CUF ) - # target_compile_definitions(dwarf-cloudsc-loki-scc-cuf-parametrise PUBLIC USE_CUDA_DRIVER=1) - target_link_options(dwarf-cloudsc-loki-scc-cuf-parametrise PUBLIC "-Mcuda") + target_link_options(dwarf-cloudsc-loki-scc-cuf-parametrise PUBLIC "-cuda") ecbuild_add_test( TARGET dwarf-cloudsc-loki-scc-cuf-parametrise-serial @@ -626,8 +625,8 @@ if( HAVE_CUDA ) set_source_files_properties( loki-scc-cuf-hoist/cuf_cloudsc_driver_loki_mod.cuf_hoist.F90 - loki-scc-cuf-hoist/cuf_cloudsc.cuf_hoist.F90 - PROPERTIES COMPILE_FLAGS "-Mcuda=maxregcount:128" + loki-scc-cuf-hoist/cuf_cloudsc.cuf_hoist.F90 + PROPERTIES COMPILE_FLAGS "-cuda -gpu=maxregcount:128" ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc-cuf-hoist @@ -640,8 +639,7 @@ if( HAVE_CUDA ) DEFINITIONS ${CLOUDSC_DEFINITIONS} CLOUDSC_GPU_SCC_CUF ) - # target_compile_definitions(dwarf-cloudsc-loki-scc-cuf-hoist PUBLIC USE_CUDA_DRIVER=1) - target_link_options(dwarf-cloudsc-loki-scc-cuf-hoist PUBLIC "-Mcuda") + target_link_options(dwarf-cloudsc-loki-scc-cuf-hoist PUBLIC "-cuda") ecbuild_add_test( TARGET dwarf-cloudsc-loki-scc-cuf-hoist-serial diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 47fc8214..fd571521 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -44,7 +44,7 @@ list(APPEND CLOUDSC_CUDA_SOURCES if( HAVE_CUDA ) # ======================================================================== - # Compile CUDA fortran files with -MCuda. + # Compile CUDA fortran files with -cuda. # # This is necessary since CMake's CUDA languages does not natively # understand CUDA-Fortran (.cuf) yet. So we simply emulate .cuf with @@ -52,7 +52,7 @@ if( HAVE_CUDA ) # ======================================================================== cloudsc_add_compile_options( SOURCES ${CLOUDSC_CUDA_SOURCES} - FLAGS "-Mcuda=maxregcount:128" + FLAGS "-cuda -gpu=maxregcount:128" ) # Add CUDA-specific flags to the library if enabled @@ -98,5 +98,5 @@ ecbuild_add_library( TARGET cloudsc-common-lib ) if( HAVE_CUDA ) - target_link_options( cloudsc-common-lib INTERFACE "-Mcuda" ) + target_link_options( cloudsc-common-lib PUBLIC "-cuda" ) endif() diff --git a/src/common/module/cloudsc_field_state_mod.F90 b/src/common/module/cloudsc_field_state_mod.F90 index 18f9822e..345dcccb 100644 --- a/src/common/module/cloudsc_field_state_mod.F90 +++ b/src/common/module/cloudsc_field_state_mod.F90 @@ -27,8 +27,8 @@ MODULE CLOUDSC_FIELD_STATE_MOD TYPE CLOUDSC_FIELD_STATE INTEGER(KIND=JPIM) :: NPROMA, KLEV ! Grid points and vertical levels per block INTEGER(KIND=JPIM) :: NGPTOT, NBLOCKS ! Total number of grid points and blocks - INTEGER(KIND=JPIM) :: KFLDX - LOGICAL(KIND=JPLM) :: LDSLPHY + INTEGER(KIND=JPIM) :: KFLDX + LOGICAL(KIND=JPLM) :: LDSLPHY LOGICAL(KIND=JPLM) :: LDMAINCALL ! T if main call to cloudsc REAL(KIND=JPRB) :: PTSPHY ! Physics timestep @@ -273,7 +273,7 @@ SUBROUTINE FIELD_INIT_R1(FIELD, NPROMA,NBLOCKS) DO B=1, NBLOCKS FIELD(:,B) = 0.0_JPRB END DO -!$omp end parallel do +!$omp end parallel do END SUBROUTINE FIELD_INIT_R1 SUBROUTINE FIELD_INIT_R2(FIELD, NPROMA, NLEV, NBLOCKS) @@ -287,7 +287,7 @@ SUBROUTINE FIELD_INIT_R2(FIELD, NPROMA, NLEV, NBLOCKS) DO B=1, NBLOCKS FIELD(:,:,B) = 0.0_JPRB END DO -!$omp end parallel do +!$omp end parallel do END SUBROUTINE FIELD_INIT_R2 SUBROUTINE FIELD_INIT_R3(FIELD, NPROMA, NLEV, NDIM, NBLOCKS) @@ -301,7 +301,7 @@ SUBROUTINE FIELD_INIT_R3(FIELD, NPROMA, NLEV, NDIM, NBLOCKS) DO B=1, NBLOCKS FIELD(:,:,:,B) = 0.0_JPRB END DO -!$omp end parallel do +!$omp end parallel do END SUBROUTINE FIELD_INIT_R3 SUBROUTINE FIELD_INIT_STATE(STATE, BUFFER, NPROMA, NLEV, NDIM, NBLOCKS) @@ -325,7 +325,7 @@ SUBROUTINE FIELD_INIT_STATE(STATE, BUFFER, NPROMA, NLEV, NDIM, NBLOCKS) STATE(B)%Q => BUFFER(:,:,3,B) STATE(B)%CLD => BUFFER(:,:,4:NFIELDS,B) END DO -!$omp end parallel do +!$omp end parallel do END SUBROUTINE FIELD_INIT_STATE SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) diff --git a/src/common/module/yomphyder.F90 b/src/common/module/yomphyder.F90 index 811e1e64..18158348 100644 --- a/src/common/module/yomphyder.F90 +++ b/src/common/module/yomphyder.F90 @@ -63,7 +63,7 @@ module yomphyder REAL(KIND=JPRB), dimension(:), pointer :: PGAW ! GAUSSIAN WEIGHTS - Reduced Grid - ~ grid box area REAL(KIND=JPRB), dimension(:), pointer :: PCLON, PSLON ! cosine, sine of longitude REAL(KIND=JPRB), dimension(:), pointer :: PMU0, PMU0M ! local cosine of instantaneous (mean) solar zenith angle - REAL(KIND=JPRB), dimension(:), pointer :: PGEMU ! sine of latitude + REAL(KIND=JPRB), dimension(:), pointer :: PGEMU ! sine of latitude REAL(KIND=JPRB), dimension(:), pointer :: POROG ! orography REAL(KIND=JPRB), dimension(:), pointer :: PGNORDL,PGNORDM ! Longitudial/latitudial derivatives of orography REAL(KIND=JPRB), dimension(:), pointer :: PGSQM2 @@ -73,7 +73,7 @@ module yomphyder REAL(KIND=JPRB), dimension(:,:), pointer :: PSP_SG, PSP_SL, PSP_RR, PSP_X2, PSD_WS, PSD_VF, & & PSD_VN, PSD_V2, PSD_VD, PSD_X2, PSD_WW REAL(KIND=JPRB), dimension(:,:,:), pointer :: PSP_OM, PSP_SB, PSP_EP, PSD_V3, PSD_XA - REAL(KIND=JPRB), dimension(:,:,:), pointer :: PEXTRD + REAL(KIND=JPRB), dimension(:,:,:), pointer :: PEXTRD REAL(KIND=JPRB), dimension(:,:), pointer :: PCOVPTOT !Precip fraction REAL(KIND=JPRB), dimension(:), pointer :: PQCFL ! T star tiles @@ -82,7 +82,7 @@ module yomphyder REAL(KIND=JPRB), dimension(:,:), pointer :: PAHFSTI ! (INSTANTANEOUS) SURFACE SENSIBLE HEAT FLUX FOR EACH TILE REAL(KIND=JPRB), dimension(:,:), pointer :: PEVAPTI ! (INSTANTANEOUS) EVAPORATION FOR EACH TILE REAL(KIND=JPRB), dimension(:,:), pointer :: PTSKTI ! SKIN TEMPERATURE FOR EACH TILE - ! other + ! other REAL(KIND=JPRB), dimension(:), pointer :: PEMIS ! MODEL SURFACE LONGWAVE EMISSIVITY. ! GPP/REC flux adjustment coefficients REAL(KIND=JPRB), dimension(:), pointer :: PCGPP, PCREC ! to store bias correction coefficients @@ -109,7 +109,7 @@ module yomphyder REAL(KIND=JPRB), dimension(:), pointer :: PTLMNWE1 ! tendency of lake totat layer temperature REAL(KIND=JPRB), dimension(:), pointer :: PTLWMLE1 ! tendency of lake mixed layer temperature REAL(KIND=JPRB), dimension(:), pointer :: PTLBOTE1 ! tendency of lake bottom layer temperature - REAL(KIND=JPRB), dimension(:), pointer :: PTLSFE1 ! tendency of lake shape factor - + REAL(KIND=JPRB), dimension(:), pointer :: PTLSFE1 ! tendency of lake shape factor - REAL(KIND=JPRB), dimension(:), pointer :: PHLICEE1 ! tendency of lake ice depth m REAL(KIND=JPRB), dimension(:), pointer :: PHLMLE1 ! tendency of lake mixed layer depth m/s end type surf_and_more_type @@ -117,11 +117,11 @@ module yomphyder type perturb_in_type REAL(KIND=JPRB), dimension(:), pointer :: PSTOPHU,PSTOPHV,PSTOPHT,PSTOPHQ ! random number for defining stochastic ! perturbation for U, V, T, and Q diabatic tendency. - REAL(KIND=JPRB), dimension(:), pointer :: PSTOPHCA ! CA pattern + REAL(KIND=JPRB), dimension(:), pointer :: PSTOPHCA ! CA pattern REAL(KIND=JPRB), dimension(:,:), pointer :: PGP2DSDT REAL(KIND=JPRB), dimension(:,:), pointer :: PVORT, PVORTGRADX, PVORTGRADY ! vorticity and its horizontal gradients REAL(KIND=JPRB), dimension(:,:), pointer :: PTOTDISS_SMOOTH ! smoothed total dissipation rate - REAL(KIND=JPRB), dimension(:,:), pointer :: PFORCEU, PFORCEV, PFORCET, PFORCEQ ! nonlinear stochastic forcing terms + REAL(KIND=JPRB), dimension(:,:), pointer :: PFORCEU, PFORCEV, PFORCET, PFORCEQ ! nonlinear stochastic forcing terms end type perturb_in_type @@ -144,7 +144,7 @@ module yomphyder REAL(KIND=JPRB), dimension(:,:), pointer :: PTRSC ! (KLON,0:KLEV) Clear-sky shortwave transmissivity REAL(KIND=JPRB), dimension(:,:,:), pointer :: PTAUAER ! (KLON,KLEV,6) OPTICAL THICKNESS FOR 6 AEROSOL TYPES REAL(KIND=JPRB), dimension(:), pointer :: PSRLWD ! (KLON) Surface downward longwave flux - REAL(KIND=JPRB), dimension(:), pointer :: PSRLWDC ! (KLON) SURFACE DOWNWARD CLEAR-SKY LONGWAVE + REAL(KIND=JPRB), dimension(:), pointer :: PSRLWDC ! (KLON) SURFACE DOWNWARD CLEAR-SKY LONGWAVE REAL(KIND=JPRB), dimension(:), pointer :: PSRSWD ! (KLON) SURFACE SHORTWAVE DOWNWARDS REAL(KIND=JPRB), dimension(:), pointer :: PSRSWDC ! (KLON) SURFACE DOWNWARD CLEAR-SKY SHORTWAVE REAL(KIND=JPRB), dimension(:), pointer :: PSRSWDCS ! (KLON) SURFACE NET SHORTWAVE CLEAR-SKY @@ -186,7 +186,7 @@ module yomphyder ! 3D DIAGNOSTICS FOR ERA40 REAL(KIND=JPRB), dimension(:,:), pointer :: PMFUDE_RATE ! UD detrainmnet rate (KG/(M3*S)) REAL(KIND=JPRB), dimension(:,:), pointer :: PMFDDE_RATE ! DD detrainmnet rate (KG/(M3*S)) - REAL(KIND=JPRB), dimension(:,:), pointer :: PKH_VDF ! turbulent diffusion coefficient for heat + REAL(KIND=JPRB), dimension(:,:), pointer :: PKH_VDF ! turbulent diffusion coefficient for heat ! array for precipitation fraction REAL(KIND=JPRB), dimension(:,:), pointer :: PCOVPTOT ! PRECIPITATION FRACTION IN EACH LAYER ! Convection and PBL types @@ -305,13 +305,13 @@ module yomphyder REAL(KIND=JPRB), dimension(:,:), pointer :: ZFRSOTI REAL(KIND=JPRB), dimension(:,:), pointer :: ZAHFTRTI REAL(KIND=JPRB), dimension(:,:), pointer :: ZALBD,ZALBP ! KLON,NTSW - ! CTESSEL: Carbon model + ! CTESSEL: Carbon model REAL(KIND=JPRB), dimension(:,:), pointer :: ZANDAYVT, ZANFMVT REAL(KIND=JPRB), dimension(:,:,:),pointer :: ZDHVEGS end type surf_and_more_local_type type aux_diag_local_type - INTEGER(KIND=JPIM), pointer :: IEXT3D ! position in extra field for diagnostics + INTEGER(KIND=JPIM), pointer :: IEXT3D ! position in extra field for diagnostics REAL(KIND=JPRB), dimension(:), pointer :: ZWND ! horizontal wind in the lowest model level REAL(KIND=JPRB), dimension(:), pointer :: ZCCNL, ZCCNO INTEGER(KIND=JPIM), dimension(:), pointer :: ITOPC, IBASC, IBOTSC @@ -327,8 +327,8 @@ module yomphyder REAL(KIND=JPRB), dimension(:,:), pointer :: ZSOTEV ! Explicit part of V-tendency from subgrid orography scheme REAL(KIND=JPRB), dimension(:,:), pointer :: ZSOBETA ! Implicit part of subgrid orography ! aerosols in microphysics - REAL(KIND=JPRB), dimension(:,:), pointer :: ZLCRIT_AER ! critical liquid mmr for rain autoconversion process - REAL(KIND=JPRB), dimension(:,:), pointer :: ZICRIT_AER ! critical liquid mmr for snow autoconversion process + REAL(KIND=JPRB), dimension(:,:), pointer :: ZLCRIT_AER ! critical liquid mmr for rain autoconversion process + REAL(KIND=JPRB), dimension(:,:), pointer :: ZICRIT_AER ! critical liquid mmr for snow autoconversion process REAL(KIND=JPRB), dimension(:,:), pointer :: ZRE_LIQ ! effective radius liquid REAL(KIND=JPRB), dimension(:,:), pointer :: ZRE_ICE ! effective radius ice REAL(KIND=JPRB), dimension(:,:), pointer :: ZCCN ! CCN (prognostic, diagnostic) @@ -352,4 +352,3 @@ module yomphyder end type keys_local_type end module yomphyder - From 783fe1e3a210e5136d379e83b971af0d4e1e4f12 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 11 Jul 2023 10:21:20 +0100 Subject: [PATCH 044/174] Remove CMP0077 policy (use CACHE variables instead) --- CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b159d88..e6f795b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,9 +9,6 @@ # define minimum version of cmake required cmake_minimum_required( VERSION 3.17 FATAL_ERROR ) -# Disable warnings about setting `ENABLE_ACC` variable for ecbuild_add_option -cmake_policy( SET CMP0077 NEW ) - find_package( ecbuild REQUIRED ) # define the project From 3c6d73a1a5b082866eb1817a16a548f400ed88ed Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 11 Jul 2023 10:21:37 +0100 Subject: [PATCH 045/174] Version guard the OpenACC hackery --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e6f795b4..64403990 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ endif() include( cloudsc_compile_options ) ### OpenACC -if( NOT DEFINED ENABLE_ACC OR ENABLE_ACC ) +if( ${CMAKE_VERSION} VERSION_LESS "3.25" AND (NOT DEFINED ENABLE_ACC OR ENABLE_ACC ) ) # Incredibly inconvenient: FindOpenACC does _not_ set OpenACC_FOUND, only # the language-specific components OpenACC_Fortran_FOUND and OpenACC_C_FOUND. # This means, even internally CMake considers OpenACC as not found. @@ -37,6 +37,7 @@ if( NOT DEFINED ENABLE_ACC OR ENABLE_ACC ) # the result, and then, trigger a second find_package via ecbuild_add_option. # This then conveniently takes the previously set OpenACC_FOUND into account # and rectifies CMake's internal bookkeeping in the process. + # This has been fixed in CMake 3.25 find_package( OpenACC ) if( OpenACC_Fortran_FOUND AND OpenACC_C_FOUND ) set( OpenACC_FOUND ON ) From d286df1c8d2015bbc84071c10f9cd857683dc27f Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 11 Jul 2023 10:52:10 +0100 Subject: [PATCH 046/174] Add NVHPC 23.5 to Github actions --- .github/scripts/bootstrap-nvhpc.sh | 3 +- .github/workflows/build.yml | 24 ++++++++++++++ arch/github/ubuntu/nvhpc/23.5/env.sh | 33 +++++++++++++++++++ arch/github/ubuntu/nvhpc/23.5/toolchain.cmake | 1 + 4 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 arch/github/ubuntu/nvhpc/23.5/env.sh create mode 120000 arch/github/ubuntu/nvhpc/23.5/toolchain.cmake diff --git a/.github/scripts/bootstrap-nvhpc.sh b/.github/scripts/bootstrap-nvhpc.sh index c099df98..e3d17099 100755 --- a/.github/scripts/bootstrap-nvhpc.sh +++ b/.github/scripts/bootstrap-nvhpc.sh @@ -2,7 +2,8 @@ set -euo pipefail set -x -nvhpc_version=21.9 +# Set nvhpc version to default value if unset +: "${nvhpc_version:=21.9}" # Use Atlas' nvhpc installation script wget https://raw.githubusercontent.com/ecmwf/atlas/develop/tools/install-nvhpc.sh diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5b634e3c..27a9e6dc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,6 +51,7 @@ jobs: include: # Add nvhpc build configurations with serialbox and HDF5 - arch: github/ubuntu/nvhpc/21.9 + nvhpc_version: 21.9 io_library_flag: '' mpi_flag: '' prec_flag: '' @@ -60,6 +61,27 @@ jobs: pyiface_flag: '' python_f2py_flag: '' - arch: github/ubuntu/nvhpc/21.9 + nvhpc_version: 21.9 + io_library_flag: '--with-serialbox' + mpi_flag: '' + prec_flag: '' + gpu_flag: '--with-gpu' + cuda_flag: '--with-cuda' + loki_flag: '--with-loki' + pyiface_flag: '' + python_f2py_flag: '' + - arch: github/ubuntu/nvhpc/23.5 + nvhpc_version: 23.5 + io_library_flag: '' + mpi_flag: '' + prec_flag: '' + gpu_flag: '--with-gpu' + cuda_flag: '--with-cuda' + loki_flag: '--with-loki' + pyiface_flag: '' + python_f2py_flag: '' + - arch: github/ubuntu/nvhpc/23.5 + nvhpc_version: 23.5 io_library_flag: '--with-serialbox' mpi_flag: '' prec_flag: '' @@ -97,6 +119,8 @@ jobs: # Install Compiler - name: Install nvhpc if: contains( matrix.arch, 'nvhpc' ) + env: + nvhpc_version: ${{ matrix.nvhpc_version }} run: .github/scripts/bootstrap-nvhpc.sh # Install HDF5 diff --git a/arch/github/ubuntu/nvhpc/23.5/env.sh b/arch/github/ubuntu/nvhpc/23.5/env.sh new file mode 100644 index 00000000..821b37b8 --- /dev/null +++ b/arch/github/ubuntu/nvhpc/23.5/env.sh @@ -0,0 +1,33 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +### Variables +export NVHPC_INSTALL_DIR=${GITHUB_WORKSPACE}/nvhpc-install +export NVHPC_VERSION=23.5 +export NVHPC_DIR=${NVHPC_INSTALL_DIR}/Linux_x86_64/${NVHPC_VERSION} + +### Compilers +export PATH=${NVHPC_DIR}/compilers/bin:${PATH} +export NVHPC_LIBRARY_PATH=${NVHPC_DIR}/compilers/lib +export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH} + +### MPI +export MPI_HOME=${NVHPC_DIR}/comm_libs/mpi +export PATH=${MPI_HOME}/bin:${PATH} + +### HDF5 +export HDF5_DIR=${GITHUB_WORKSPACE}/hdf5-install +export LD_LIBRARY_PATH=${HDF5_DIR}/lib:${LD_LIBRARY_PATH} +export PATH=${HDF5_DIR}/bin:${PATH} + +### Compiler variables +export CC=nvc +export CXX=nvc++ +export FC=nvfortran diff --git a/arch/github/ubuntu/nvhpc/23.5/toolchain.cmake b/arch/github/ubuntu/nvhpc/23.5/toolchain.cmake new file mode 120000 index 00000000..2fd38d62 --- /dev/null +++ b/arch/github/ubuntu/nvhpc/23.5/toolchain.cmake @@ -0,0 +1 @@ +../../../../toolchains/github-ubuntu-nvhpc.cmake \ No newline at end of file From a35fb90f7eadf32e8aeff6d8d297e10161642802 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 11 Jul 2023 14:05:43 +0100 Subject: [PATCH 047/174] Free runner disk space for NVHPC --- .github/workflows/build.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 27a9e6dc..5d8777ec 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -116,6 +116,24 @@ jobs: if: contains( matrix.mpi_flag, 'with-mpi' ) run: sudo apt-get install libopenmpi-dev + # Free up disk space for nvhpc + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + if: contains( matrix.arch, 'nvhpc' ) + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + # Install Compiler - name: Install nvhpc if: contains( matrix.arch, 'nvhpc' ) From e7408e895461de65680ed31b27d9c691f7f563f3 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 11 Jul 2023 14:30:07 +0100 Subject: [PATCH 048/174] Choose pgf90 for HDF5 build --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5d8777ec..8e1e3292 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -148,7 +148,9 @@ jobs: - name: Install HDF5 from source if: contains( matrix.arch, 'nvhpc' ) && ! contains( matrix.io_library_flag, 'with-serialbox' ) - run: source arch/${{ matrix.arch }}/env.sh && .github/scripts/install-hdf5.sh + run: | + source arch/${{ matrix.arch }}/env.sh + FC=pgf90 .github/scripts/install-hdf5.sh # Install Boost - name: Install Boost libraries From 17cdd05174a85d03675633b3400c6f346939bd54 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 17 Jul 2023 17:52:09 +0100 Subject: [PATCH 049/174] Increase stack size on Github runner --- arch/github/ubuntu/nvhpc/21.9/env.sh | 3 +++ arch/github/ubuntu/nvhpc/23.5/env.sh | 3 +++ 2 files changed, 6 insertions(+) diff --git a/arch/github/ubuntu/nvhpc/21.9/env.sh b/arch/github/ubuntu/nvhpc/21.9/env.sh index 948b45a2..de7afa2f 100644 --- a/arch/github/ubuntu/nvhpc/21.9/env.sh +++ b/arch/github/ubuntu/nvhpc/21.9/env.sh @@ -33,3 +33,6 @@ export CXX=pgc++ export FC=pgf90 export ECBUILD_TOOLCHAIN="./toolchain.cmake" + +# Increase stack size to maximum +ulimit -S -s unlimited diff --git a/arch/github/ubuntu/nvhpc/23.5/env.sh b/arch/github/ubuntu/nvhpc/23.5/env.sh index 821b37b8..dfbac6e0 100644 --- a/arch/github/ubuntu/nvhpc/23.5/env.sh +++ b/arch/github/ubuntu/nvhpc/23.5/env.sh @@ -31,3 +31,6 @@ export PATH=${HDF5_DIR}/bin:${PATH} export CC=nvc export CXX=nvc++ export FC=nvfortran + +# Increase stack size to maximum +ulimit -S -s unlimited From 852c0872c05908d8c3501ac9b164f266941f5054 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 17 Jul 2023 17:53:50 +0100 Subject: [PATCH 050/174] Source env.sh before running --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8e1e3292..7ba2a74a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -190,4 +190,6 @@ jobs: mpi_flag: ${{ matrix.mpi_flag }} arch: ${{ matrix.arch }} if: ${{ matrix.prec_flag == '' }} - run: .github/scripts/run-targets.sh + run: | + source arch/${{ matrix.arch }}/env.sh + .github/scripts/run-targets.sh From 6e730725a27c8d0c8663cd159b419a07d52e69a4 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 18 Jul 2023 13:12:10 +0100 Subject: [PATCH 051/174] Fix wrong binary name in test --- src/cloudsc_fortran_atlas/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cloudsc_fortran_atlas/CMakeLists.txt b/src/cloudsc_fortran_atlas/CMakeLists.txt index dc21c6ae..87e6d137 100644 --- a/src/cloudsc_fortran_atlas/CMakeLists.txt +++ b/src/cloudsc_fortran_atlas/CMakeLists.txt @@ -44,7 +44,7 @@ endif() ecbuild_add_test( TARGET dwarf-cloudsc-fortran-atlas-serial - COMMAND bin/dwarf-cloudsc-atlas-fortran + COMMAND bin/dwarf-cloudsc-fortran-atlas ARGS 1 100 16 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 From 37ae6739acd937b4f69b4bb0ee0451d42f735b15 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 18 Jul 2023 13:12:38 +0100 Subject: [PATCH 052/174] Disable atlas tests if executable is not built --- src/cloudsc_fortran_atlas/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cloudsc_fortran_atlas/CMakeLists.txt b/src/cloudsc_fortran_atlas/CMakeLists.txt index 87e6d137..f1e61123 100644 --- a/src/cloudsc_fortran_atlas/CMakeLists.txt +++ b/src/cloudsc_fortran_atlas/CMakeLists.txt @@ -27,7 +27,6 @@ if( HAVE_CLOUDSC_FORTRAN_ATLAS ) atlas_f DEFINITIONS ${CLOUDSC_DEFINITIONS} ) -endif() # Create symlink for the input data if( HAVE_SERIALBOX ) @@ -75,3 +74,5 @@ endif() OMP 4 CONDITION HAVE_OMP AND HAVE_MPI ) + +endif() From 245899215e0b96f7d3733b1e7671424f7387a93c Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 18 Jul 2023 13:12:57 +0100 Subject: [PATCH 053/174] Check if MPI was already initialized --- src/common/module/cloudsc_mpi_mod.F90 | 31 ++++++++++++++++----------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/common/module/cloudsc_mpi_mod.F90 b/src/common/module/cloudsc_mpi_mod.F90 index 3d011545..215de33c 100644 --- a/src/common/module/cloudsc_mpi_mod.F90 +++ b/src/common/module/cloudsc_mpi_mod.F90 @@ -61,21 +61,28 @@ subroutine cloudsc_mpi_init(numomp) integer(kind=jpim), intent(in), optional :: numomp ! number of OpenMP threads #ifdef HAVE_MPI integer(kind=jpim) :: ierror, iprovided, irequired ! MPI status variables - - ! request threading support if multiple OpenMP threads are used - iprovided = mpi_thread_single - irequired = mpi_thread_single - if (present(numomp)) then - if (numomp > 1) then - irequired = mpi_thread_multiple + logical :: linit + + ! check if MPI has already been initialized + call mpi_initialized(linit, ierror) + if (ierror /= 0) call abor1('cloudsc_mpi: mpi_initialized failed') + + if (.not. linit) then + ! request threading support if multiple OpenMP threads are used + iprovided = mpi_thread_single + irequired = mpi_thread_single + if (present(numomp)) then + if (numomp > 1) then + irequired = mpi_thread_multiple + end if end if - end if - call mpi_init_thread(irequired, iprovided, ierror) + call mpi_init_thread(irequired, iprovided, ierror) - if (ierror /= 0) call abor1('cloudsc_mpi: mpi_init_thread failed') - if (iprovided < irequired) then - print *, "WARNING: MPI_INIT_THREAD reports insufficient threading support" + if (ierror /= 0) call abor1('cloudsc_mpi: mpi_init_thread failed') + if (iprovided < irequired) then + print *, "WARNING: MPI_INIT_THREAD reports insufficient threading support" + end if end if ! determine communicator size and local rank From c41a0c40eadb588746eac2f80fb6219cf31ae290 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 18 Jul 2023 13:42:39 +0100 Subject: [PATCH 054/174] Add eckit, fckit, atlas as optional bundle dependencies --- bundle.yml | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/bundle.yml b/bundle.yml index c087d131..57230a55 100644 --- a/bundle.yml +++ b/bundle.yml @@ -5,11 +5,15 @@ name : cloudsc-bundle version : 1.0.0-develop cmake : > CMAKE_LINK_DEPENDS_NO_SHARED=ON + BUILD_field_api=OFF + BUILD_eckit=OFF + BUILD_fckit=OFF + BUILD_atlas=OFF projects : - ecbuild : - git : https://github.com/ecmwf/ecbuild + git : https://github.com/ecmwf/ecbuild version : 3.7.0 bundle : false @@ -36,13 +40,37 @@ projects : LOKI_ENABLE_TESTS=OFF LOKI_ENABLE_NO_INSTALL=ON + - eckit : + git : https://github.com/ecmwf/eckit + version : 1.24.3 + optional: true + require : ecbuild + cmake : > + ECKIT_ENABLE_TESTS=OFF + ECKIT_ENABLE_BUILD_TOOLS=OFF + + - fckit : + git : https://github.com/ecmwf/fckit + version : 0.9.0 + optional: true + require : ecbuild eckit + cmake : > + FCKIT_ENABLE_TESTS=OFF + + - atlas : + git : https://github.com/ecmwf/atlas + version : master + optional: true + require : ecbuild eckit fckit + cmake : > + ATLAS_ENABLE_TESTS=OFF + - field_api : git : ${BITBUCKET}/rdx/field_api version : master optional: true require : ecbuild cmake : > - BUILD_field_api=OFF ENABLE_FIELD_API_TESTS=OFF ENABLE_FIELD_API_FIAT_BUILD=OFF FIELD_API_UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module @@ -111,6 +139,21 @@ options : cmake : > CLOUDSC_PYTHON_F2PY=ON + - with-atlas : + help : Build Atlas and its dependencies (eckit, fckit) and enable Atlas-based variants of CLOUDSC + cmake : > + BUILD_eckit=ON + BUILD_fckit=ON + BUILD_atlas=ON + + - with-dependency-tests : + help : Build and enable tests for CLOUDSC dependencies that are build as part of the bundle (eckit, fckit, Atlas, Loki) + cmake : > + LOKI_ENABLE_TESTS=ON + ECKIT_ENABLE_TESTS=ON + FCKIT_ENABLE_TESTS=ON + ATLAS_ENABLE_TESTS=ON + - cloudsc-prototype1 : help : Build the original operational Fortran prototype [ON|OFF] cmake : ENABLE_CLOUDSC_PROTOTYPE1={{value}} From 95aaef3654d9b42279240065763c9c462650b321 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 18 Jul 2023 13:52:01 +0100 Subject: [PATCH 055/174] Add Atlas variant to Github actions --- .github/scripts/verify-targets.sh | 7 ++++++- .github/workflows/build.yml | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 7040502d..634f41eb 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -17,7 +17,7 @@ fi if [[ "$gpu_flag" == "--with-gpu" ]] then - targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) + targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) targets+=(dwarf-cloudsc-gpu-omp-scc-hoist) if [[ "$claw_flag" == "--with-claw" ]] then @@ -52,6 +52,11 @@ then fi fi +if [[ "$atlas_flag" == "--with-atlas" ]] +then + targets+=(dwarf-cloudsc-fortran-atlas) +fi + if [[ "$pyiface_flag" == "--cloudsc-fortran-pyiface=ON" ]] then targets+=(cloudsc_pyiface.py) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5b634e3c..99aded56 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,6 +42,8 @@ jobs: loki_flag: ['', '--with-loki'] # Loki source-to-source translation enabled + atlas_flag: ['--with-atlas'] # Variant using Atlas-managed fields + claw_flag: [''] # Flag to enable CLAW-generated variants pyiface_flag: [''] # Flag to enable Python-interface variant @@ -57,6 +59,7 @@ jobs: gpu_flag: '--with-gpu' cuda_flag: '--with-cuda' loki_flag: '--with-loki' + atlas_flag: '--with-atlas' pyiface_flag: '' python_f2py_flag: '' - arch: github/ubuntu/nvhpc/21.9 @@ -66,6 +69,7 @@ jobs: gpu_flag: '--with-gpu' cuda_flag: '--with-cuda' loki_flag: '--with-loki' + atlas_flag: '--with-atlas' pyiface_flag: '' python_f2py_flag: '' # Add pyiface build configuration for HDF5 only @@ -76,6 +80,7 @@ jobs: gpu_flag: '' cuda_flag: '' loki_flag: '' + atlas_flag: '' pyiface_flag: '--cloudsc-fortran-pyiface=ON' python_f2py_flag: '--cloudsc-python-f2py=ON' @@ -124,7 +129,7 @@ jobs: --arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \ ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} \ ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} \ - ${{ matrix.pyiface_flag }} ${{ matrix.python_f2py_flag }} + ${{ matrix.atlas_flag }} ${{ matrix.pyiface_flag }} ${{ matrix.python_f2py_flag }} # Verify targets exist - name: Verify targets @@ -134,6 +139,7 @@ jobs: gpu_flag: ${{ matrix.gpu_flag }} cuda_flag: ${{ matrix.cuda_flag }} loki_flag: ${{ matrix.loki_flag }} + atlas_flag: ${{ matrix.atlas_flag }} claw_flag: ${{ matrix.claw_flag }} pyiface_flag: ${{ matrix.pyiface_flag }} python_f2py_flag: ${{ matrix.python_f2py_flag }} From 596098959e0525f563eecb36832e1ee4299b0756 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 18 Jul 2023 14:58:54 +0100 Subject: [PATCH 056/174] Allow for Atlas binaries in Github CI --- .github/scripts/verify-targets.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 634f41eb..7e29efa2 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -55,6 +55,9 @@ fi if [[ "$atlas_flag" == "--with-atlas" ]] then targets+=(dwarf-cloudsc-fortran-atlas) + # Atlas builds a number of binaries that end up in bin, too: + targets+=(atlas atlas-atest-mgrids atlas-gaussian-latitudes atlas-grids) + targets+=(atlas-io-list atlas-meshgen fckit) fi if [[ "$pyiface_flag" == "--cloudsc-fortran-pyiface=ON" ]] From 8620e5e4822038aa087aced782f2b947e92bd81a Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 18 Jul 2023 15:47:43 +0100 Subject: [PATCH 057/174] Reduce NPROMA for idem-stack target --- .github/scripts/run-targets.sh | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/scripts/run-targets.sh b/.github/scripts/run-targets.sh index 1cfa9f11..8b3f29b3 100755 --- a/.github/scripts/run-targets.sh +++ b/.github/scripts/run-targets.sh @@ -11,7 +11,7 @@ skipped_targets=(dwarf-cloudsc-gpu-claw) if [[ "$arch" == *"nvhpc"* ]] then # Skip GPU targets if built with nvhpc (don't have GPU in test runner) - skipped_targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) + skipped_targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) skipped_targets+=(dwarf-cloudsc-gpu-omp-scc-hoist dwarf-cloudsc-gpu-scc-field) # Skip GPU targets from Loki if built with nvhpc (don't have GPU in test runner) @@ -41,11 +41,21 @@ do continue fi + if [[ "$target" == "dwarf-cloudsc-loki-idem-stack" ]] + then + # The CPU-variant of the stack causes segfaults with NVHPC for NPROMA>32 + # in __c_mset16_avx, possibly because this enables some vectorized + # code path with longer vector lengths that cause out-of-bounds reads/writes. + nproma=16 + else + nproma=64 + fi + if [[ "$mpi_flag" == "--with-mpi" && ! " ${non_mpi_targets[*]} " =~ " $target " ]] then # Two ranks with one thread each, safe NPROMA # NB: Use oversubscribe to run, even if we end up on a single core agent - mpirun --oversubscribe -np 2 bin/$target 1 100 64 + mpirun --oversubscribe -np 2 bin/$target 1 100 $nproma elif [[ "$target" == "cloudsc_pyiface.py" ]] then bin/$target --numomp 1 --ngptot 100 --nproma 64 @@ -54,7 +64,7 @@ do bin/$target --ngptot 100 --nproma 128 else # Single thread, safe NPROMA - bin/$target 1 100 64 + bin/$target 1 100 $nproma fi exit_code=$((exit_code + $?)) done From 89cd9076a2d96ec7386a9630709b1d6a26503ad8 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Thu, 20 Jul 2023 13:47:17 +0100 Subject: [PATCH 058/174] Use CTest to run targets --- .github/workflows/build.yml | 27 +++++++++++++++++++-------- src/cloudsc_loki/CMakeLists.txt | 2 +- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7ba2a74a..b63b54c7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,6 +48,8 @@ jobs: python_f2py_flag: [''] # Flag to enable Python variants + ctest_exclude_pattern: ['-scc-hoist-'] # Regex to disable CTest tests + include: # Add nvhpc build configurations with serialbox and HDF5 - arch: github/ubuntu/nvhpc/21.9 @@ -60,6 +62,7 @@ jobs: loki_flag: '--with-loki' pyiface_flag: '' python_f2py_flag: '' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: github/ubuntu/nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '--with-serialbox' @@ -70,6 +73,7 @@ jobs: loki_flag: '--with-loki' pyiface_flag: '' python_f2py_flag: '' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: github/ubuntu/nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' @@ -80,6 +84,7 @@ jobs: loki_flag: '--with-loki' pyiface_flag: '' python_f2py_flag: '' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: github/ubuntu/nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '--with-serialbox' @@ -90,6 +95,7 @@ jobs: loki_flag: '--with-loki' pyiface_flag: '' python_f2py_flag: '' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE # Add pyiface build configuration for HDF5 only - arch: github/ubuntu/gnu/9.4.0 io_library_flag: '' @@ -183,13 +189,18 @@ jobs: python_f2py_flag: ${{ matrix.python_f2py_flag }} run: .github/scripts/verify-targets.sh - # Run double-precision targets - # (Mind the exclusions inside the script!) - - name: Run targets - env: - mpi_flag: ${{ matrix.mpi_flag }} - arch: ${{ matrix.arch }} + # Run ctest + - name: Run CTest if: ${{ matrix.prec_flag == '' }} + working-directory: ./build run: | - source arch/${{ matrix.arch }}/env.sh - .github/scripts/run-targets.sh + source env.sh + ctest -O ctest.log --output-on-failure -E "${{ matrix.ctest_exclude_pattern }}" + + # Upload test output + - name: Archive CTest output + uses: actions/upload-artifact@v3 + if: ${{ matrix.prec_flag == '' }} + with: + name: ctest-log + path: build/ctest.log diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index 0c55b731..c7b2c60e 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -710,7 +710,7 @@ endif() ARGS 4 100 16 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 4 - CONDITION HAVE_MPI + CONDITION HAVE_OMP ) endif() From b64b6fade36b315d7f07eda756590823f4a23e77 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 24 Jul 2023 12:58:30 +0100 Subject: [PATCH 059/174] Remove redundant run-targets script --- .github/scripts/run-targets.sh | 72 ---------------------------------- 1 file changed, 72 deletions(-) delete mode 100755 .github/scripts/run-targets.sh diff --git a/.github/scripts/run-targets.sh b/.github/scripts/run-targets.sh deleted file mode 100755 index 8b3f29b3..00000000 --- a/.github/scripts/run-targets.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -set -eu -set -x - -# These targets don't have an MPI-parallel driver routine -non_mpi_targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-c) - -# These targets currently cause issues and are therefore not tested -skipped_targets=(dwarf-cloudsc-gpu-claw) - -if [[ "$arch" == *"nvhpc"* ]] -then - # Skip GPU targets if built with nvhpc (don't have GPU in test runner) - skipped_targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) - skipped_targets+=(dwarf-cloudsc-gpu-omp-scc-hoist dwarf-cloudsc-gpu-scc-field) - - # Skip GPU targets from Loki if built with nvhpc (don't have GPU in test runner) - skipped_targets+=(dwarf-cloudsc-loki-claw-gpu dwarf-cloudsc-loki-scc dwarf-cloudsc-loki-scc-hoist) - - # Skip CUDA targets if built with nvhpc - skipped_targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching) - skipped_targets+=(dwarf-cloudsc-loki-scc-cuf-hoist dwarf-cloudsc-loki-scc-cuf-parametrise) - skipped_targets+=(dwarf-cloudsc-cuda dwarf-cloudsc-cuda-hoist dwarf-cloudsc-cuda-k-caching) - - # Skip C target if built with nvhpc, segfaults for unknown reasons - skipped_targets+=(dwarf-cloudsc-c dwarf-cloudsc-loki-c) -fi - -exit_code=0 -cd build - -# -# Run each of the binaries with a safe NPROMA value and validate exit codes -# - -for target in $(ls bin) -do - # Skip some targets - if [[ " ${skipped_targets[*]} " =~ " $target " ]] - then - continue - fi - - if [[ "$target" == "dwarf-cloudsc-loki-idem-stack" ]] - then - # The CPU-variant of the stack causes segfaults with NVHPC for NPROMA>32 - # in __c_mset16_avx, possibly because this enables some vectorized - # code path with longer vector lengths that cause out-of-bounds reads/writes. - nproma=16 - else - nproma=64 - fi - - if [[ "$mpi_flag" == "--with-mpi" && ! " ${non_mpi_targets[*]} " =~ " $target " ]] - then - # Two ranks with one thread each, safe NPROMA - # NB: Use oversubscribe to run, even if we end up on a single core agent - mpirun --oversubscribe -np 2 bin/$target 1 100 $nproma - elif [[ "$target" == "cloudsc_pyiface.py" ]] - then - bin/$target --numomp 1 --ngptot 100 --nproma 64 - elif [[ "$target" == "cloudsc_f2py.py" ]] - then - bin/$target --ngptot 100 --nproma 128 - else - # Single thread, safe NPROMA - bin/$target 1 100 $nproma - fi - exit_code=$((exit_code + $?)) -done - -exit $exit_code From df29dc80789c5063ab05536f66f11fca3b89e88e Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 24 Jul 2023 13:39:50 +0100 Subject: [PATCH 060/174] Reduce CI matrix explosion slightly --- .github/scripts/verify-targets.sh | 26 ++++----- .github/workflows/build.yml | 96 ++++++++++--------------------- 2 files changed, 42 insertions(+), 80 deletions(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 7040502d..62279096 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -15,25 +15,25 @@ then targets+=(dwarf-cloudsc-c) fi -if [[ "$gpu_flag" == "--with-gpu" ]] +if [[ "$build_flags" == *"--with-gpu"* ]] then - targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) + targets+=(dwarf-cloudsc-gpu-scc dwarf-cloudsc-gpu-scc-hoist dwarf-cloudsc-gpu-scc-k-caching) targets+=(dwarf-cloudsc-gpu-omp-scc-hoist) - if [[ "$claw_flag" == "--with-claw" ]] + if [[ "$build_flags" == *"--with-claw"* ]] then targets+=(dwarf-cloudsc-gpu-claw) fi - if [[ "$cuda_flag" == "--with-cuda" ]] + if [[ "$build_flags" == *"--with-cuda"* ]] then targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching) - fi - if [[ "$cuda_flag" == "--with-cuda" && "$io_library_flag" == "--with-serialbox" ]] - then - targets+=(dwarf-cloudsc-cuda dwarf-cloudsc-cuda-hoist dwarf-cloudsc-cuda-k-caching) + if [[ "$io_library_flag" == "--with-serialbox" ]] + then + targets+=(dwarf-cloudsc-cuda dwarf-cloudsc-cuda-hoist dwarf-cloudsc-cuda-k-caching) + fi fi fi -if [[ "$loki_flag" == "--with-loki" ]] +if [[ "$build_flags" == *"--with-loki"* ]] then targets+=(dwarf-cloudsc-loki-idem dwarf-cloudsc-loki-sca) targets+=(dwarf-cloudsc-loki-scc dwarf-cloudsc-loki-scc-hoist) @@ -42,22 +42,22 @@ then then targets+=(dwarf-cloudsc-loki-c) fi - if [[ "$claw_flag" == "--with-claw" ]] + if [[ "$build_flags" == *"--with-claw"* ]] then targets+=(dwarf-cloudsc-loki-claw-cpu dwarf-cloudsc-loki-claw-gpu) fi - if [[ "$cuda_flag" == "--with-cuda" ]] + if [[ "$build_flags" == *"--with-cuda"* ]] then targets+=(dwarf-cloudsc-loki-scc-cuf-hoist dwarf-cloudsc-loki-scc-cuf-parametrise) fi fi -if [[ "$pyiface_flag" == "--cloudsc-fortran-pyiface=ON" ]] +if [[ "$build_flags" == *"--cloudsc-fortran-pyiface=ON"* ]] then targets+=(cloudsc_pyiface.py) fi -if [[ "$python_f2py_flag" == "--cloudsc-python-f2py=ON" ]] +if [[ "$build_flags" == *"--cloudsc-python-f2py=ON"* ]] then targets+=(cloudsc_f2py.py) fi diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b63b54c7..03d934ff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ on: jobs: # This workflow contains a single job called "build" build: - name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.mpi_flag }} ${{ matrix.prec_flag }} ${{ matrix.gpu_flag }} ${{ matrix.loki_flag }} ${{ matrix.claw_flag }} ${{ matrix.pyiface_flag }} ${{ matrix.python_f2py_flag }} + name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.prec_flag }} ${{ matrix.build_flags }} # The type of runner that the job will run on runs-on: ubuntu-20.04 @@ -27,85 +27,54 @@ jobs: matrix: - arch: - - github/ubuntu/gnu/9.4.0 + arch: ['gnu/9.4.0'] # Default arch on Github is GNU 9.4.0 for now io_library_flag: ['', '--with-serialbox'] # Switch between Serialbox and HDF5 - mpi_flag: ['', '--with-mpi'] # Enable MPI-parallel build - prec_flag: ['', '--single-precision'] # Switch single/double precision - gpu_flag: ['', '--with-gpu'] # GPU-variants enabled - - cuda_flag: [''] # Enable CUDA variants + build_flags: + - '' # Plain build without any options + - '--with-gpu --with-loki' # Enable Loki and GPU variants + - '--with-gpu --with-loki --with-mpi' # Enable Loki and GPU variants with MPI - loki_flag: ['', '--with-loki'] # Loki source-to-source translation enabled + pyiface_flag: [''] # Enable the pyiface variant - claw_flag: [''] # Flag to enable CLAW-generated variants + python_f2py_flag: [''] # Enable the f2py variant - pyiface_flag: [''] # Flag to enable Python-interface variant - - python_f2py_flag: [''] # Flag to enable Python variants + claw_flag: [''] ctest_exclude_pattern: ['-scc-hoist-'] # Regex to disable CTest tests include: + # Add pyiface build configuration for double precision, non-MPI, HDF5 only + - arch: gnu/9.4.0 + io_library_flag: '' + prec_flag: '' + build_flags: '--cloudsc-fortran-pyiface=ON --cloudsc-python-f2py=ON' + # Add nvhpc build configurations with serialbox and HDF5 - - arch: github/ubuntu/nvhpc/21.9 + - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' - mpi_flag: '' - prec_flag: '' - gpu_flag: '--with-gpu' - cuda_flag: '--with-cuda' - loki_flag: '--with-loki' - pyiface_flag: '' - python_f2py_flag: '' + build_flags: '--with-gpu --with-loki --with-cuda' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - - arch: github/ubuntu/nvhpc/21.9 + - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '--with-serialbox' - mpi_flag: '' - prec_flag: '' - gpu_flag: '--with-gpu' - cuda_flag: '--with-cuda' - loki_flag: '--with-loki' - pyiface_flag: '' - python_f2py_flag: '' + build_flags: '--with-gpu --with-loki --with-cuda' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - - arch: github/ubuntu/nvhpc/23.5 + + - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' - mpi_flag: '' - prec_flag: '' - gpu_flag: '--with-gpu' - cuda_flag: '--with-cuda' - loki_flag: '--with-loki' - pyiface_flag: '' - python_f2py_flag: '' + build_flags: '--with-gpu --with-loki --with-cuda' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - - arch: github/ubuntu/nvhpc/23.5 + - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '--with-serialbox' - mpi_flag: '' - prec_flag: '' - gpu_flag: '--with-gpu' - cuda_flag: '--with-cuda' - loki_flag: '--with-loki' - pyiface_flag: '' - python_f2py_flag: '' + build_flags: '--with-gpu --with-loki --with-cuda' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - # Add pyiface build configuration for HDF5 only - - arch: github/ubuntu/gnu/9.4.0 - io_library_flag: '' - mpi_flag: '' - prec_flag: '' - gpu_flag: '' - cuda_flag: '' - loki_flag: '' - pyiface_flag: '--cloudsc-fortran-pyiface=ON' - python_f2py_flag: '--cloudsc-python-f2py=ON' # Steps represent a sequence of tasks that will be executed as part of the job steps: @@ -119,7 +88,7 @@ jobs: # Install MPI - name: Install MPI via Apt - if: contains( matrix.mpi_flag, 'with-mpi' ) + if: contains( matrix.build_flags, 'with-mpi' ) run: sudo apt-get install libopenmpi-dev # Free up disk space for nvhpc @@ -155,7 +124,7 @@ jobs: - name: Install HDF5 from source if: contains( matrix.arch, 'nvhpc' ) && ! contains( matrix.io_library_flag, 'with-serialbox' ) run: | - source arch/${{ matrix.arch }}/env.sh + source arch/github/ubuntu/${{ matrix.arch }}/env.sh FC=pgf90 .github/scripts/install-hdf5.sh # Install Boost @@ -171,22 +140,15 @@ jobs: - name: Bundle build run: | ./cloudsc-bundle build --retry-verbose \ - --arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \ - ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} \ - ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} \ - ${{ matrix.pyiface_flag }} ${{ matrix.python_f2py_flag }} + --arch=arch/github/ubuntu/${{ matrix.arch }} \ + ${{ matrix.prec_flag }} ${{ matrix.io_library_flag }} ${{ matrix.build_flags }} # Verify targets exist - name: Verify targets env: io_library_flag: ${{ matrix.io_library_flag }} prec_flag: ${{ matrix.prec_flag }} - gpu_flag: ${{ matrix.gpu_flag }} - cuda_flag: ${{ matrix.cuda_flag }} - loki_flag: ${{ matrix.loki_flag }} - claw_flag: ${{ matrix.claw_flag }} - pyiface_flag: ${{ matrix.pyiface_flag }} - python_f2py_flag: ${{ matrix.python_f2py_flag }} + build_flags: ${{ matrix.build_flags }} run: .github/scripts/verify-targets.sh # Run ctest From 766b0eb2be3c4836a48d6e170b182db0b35740a7 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 4 Aug 2023 16:31:25 +0200 Subject: [PATCH 061/174] Add arch files for Leonardo --- arch/eurohpc/leonardo/nvhpc/23.1/env.sh | 49 ++++++++++++++++ .../leonardo/nvhpc/23.1/toolchain.cmake | 1 + arch/toolchains/eurohpc-leonardo-nvhpc.cmake | 57 +++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 arch/eurohpc/leonardo/nvhpc/23.1/env.sh create mode 120000 arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake create mode 100644 arch/toolchains/eurohpc-leonardo-nvhpc.cmake diff --git a/arch/eurohpc/leonardo/nvhpc/23.1/env.sh b/arch/eurohpc/leonardo/nvhpc/23.1/env.sh new file mode 100644 index 00000000..f1333dbd --- /dev/null +++ b/arch/eurohpc/leonardo/nvhpc/23.1/env.sh @@ -0,0 +1,49 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# NB: This does currently not support the Serialbox-based build modes +# because the available Boost module does not include the boost_filesystem library + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Load modules +module_load nvhpc/23.1 +module_load openmpi/4.1.4--nvhpc--23.1-cuda-11.8 +module_load cmake/3.24.3 +module_load cuda/11.8 +module_load hdf5/1.12.2--openmpi--4.1.4--nvhpc--23.1 +module_load python/3.10.8--gcc--8.5.0 + +export CC=nvc +export CXX=nvc++ +export F77=nvfortran +export FC=nvfortran +export F90=nvfortran + +# Increase stack size to maximum +ulimit -S -s unlimited + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +# Variable no longer required, make sure it is not set +unset ECBUILD_TOOLCHAIN diff --git a/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake b/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake new file mode 120000 index 00000000..dd30d0f4 --- /dev/null +++ b/arch/eurohpc/leonardo/nvhpc/23.1/toolchain.cmake @@ -0,0 +1 @@ +../../../../toolchains/eurohpc-leonardo-nvhpc.cmake \ No newline at end of file diff --git a/arch/toolchains/eurohpc-leonardo-nvhpc.cmake b/arch/toolchains/eurohpc-leonardo-nvhpc.cmake new file mode 100644 index 00000000..ce8de9da --- /dev/null +++ b/arch/toolchains/eurohpc-leonardo-nvhpc.cmake @@ -0,0 +1,57 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +# Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module +# unless its stored as a cache variable +set( OpenMP_Fortran_FLAGS "-mp -mp=gpu,bind,allcores,numa" CACHE STRING "" ) + +# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to +# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone +# doesn't have any effect here as the module uses FORCE to overwrite the +# existing value) +set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") + +#################################################################### +# OpenAcc FLAGS +#################################################################### + +# NB: We have to add `-mp` again to avoid undefined symbols during linking +# (smells like an Nvidia bug) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp=gpu -gpu=cc80,lineinfo,fastmath" CACHE STRING "" ) +# Enable this to get more detailed compiler output +# set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) + +#################################################################### +# COMMON FLAGS +#################################################################### + +set(ECBUILD_Fortran_FLAGS "-fpic") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mframe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mstack_arrays") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mrecursive") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Ktrap=fp") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Kieee") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Mdaz") + +set( ECBUILD_Fortran_FLAGS_BIT "-O2 -gopt" ) + +set( ECBUILD_C_FLAGS "-O2 -gopt -traceback" ) + +set( ECBUILD_CXX_FLAGS "-O2 -gopt" ) From dbaa29343852dfb64bf2b6dc71485489423a9f96 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Thu, 17 Aug 2023 17:19:39 +0200 Subject: [PATCH 062/174] debug leftover (tnx Balthasar) --- src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index 1e2a7df2..dbbb9dc9 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -78,10 +78,6 @@ PROGRAM DWARF_CLOUDSC ! TODO: Create a global memory state from serialized input data CALL GLOBAL_ATLAS_STATE%LOAD(FSET, NPROMA, NGPTOT, NGPTOTG) -!FIELD = FSET%FIELD("PAP") -!call field%data(tmp3d) -!print *, MINVAL(tmp3d), MAXVAL(tmp3d) - ! Call the driver to perform the parallel loop over our kernel CALL CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY) From b23deb3983cc5f9406ed69971ada129a71462e2d Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 22 Aug 2023 11:58:51 +0200 Subject: [PATCH 063/174] Atlas lets the dwarf do the mpi initialisation --- src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index dbbb9dc9..20755a9e 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -10,7 +10,7 @@ PROGRAM DWARF_CLOUDSC USE PARKIND1, ONLY: JPIM, JPIB -USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK +USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK, MPI_COMM_WORLD USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER USE EC_PMON_MOD, ONLY: EC_PMON @@ -52,8 +52,8 @@ PROGRAM DWARF_CLOUDSC end if ! Initialize MPI environment -CALL ATLAS_LIBRARY%INITIALISE() CALL CLOUDSC_MPI_INIT(NUMOMP) +CALL ATLAS_INIT(COMM=MPI_COMM_WORLD) ! Get total number of grid points (NGPTOT) with which to run the benchmark IF (IARGS >= 2) THEN From 698bdd205c2b3442ceaf44644f1837f782be63f9 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 23 Aug 2023 16:09:01 +0000 Subject: [PATCH 064/174] update authors --- AUTHORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.md b/AUTHORS.md index 1670b2a7..650e42ac 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -2,6 +2,7 @@ - M. Ahlgrimm (ECMWF) - P. Bechtold (ECMWF) +- S. Brdar (ECMWF) - W. Deconinck (ECMWF) - R. Forbes (ECMWF) - C. Jakob (ECMWF) From 3b31fdf8ccb3ea55d7b927f5e59ef6d7cee0d369 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 23 Aug 2023 16:09:35 +0000 Subject: [PATCH 065/174] take newest developments from Atlas --- bundle.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bundle.yml b/bundle.yml index 57230a55..3b2ff9fa 100644 --- a/bundle.yml +++ b/bundle.yml @@ -42,7 +42,7 @@ projects : - eckit : git : https://github.com/ecmwf/eckit - version : 1.24.3 + version : develop optional: true require : ecbuild cmake : > @@ -51,7 +51,7 @@ projects : - fckit : git : https://github.com/ecmwf/fckit - version : 0.9.0 + version : develop optional: true require : ecbuild eckit cmake : > @@ -59,7 +59,7 @@ projects : - atlas : git : https://github.com/ecmwf/atlas - version : master + version : feature/MultiField optional: true require : ecbuild eckit fckit cmake : > From 1a06752979ef861351ef156eeb6be61f643531bb Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 23 Aug 2023 16:20:12 +0000 Subject: [PATCH 066/174] bug fix in Atlas structured when the data array in the first block is less in size than nproma --- src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 | 2 +- src/cloudsc_fortran_atlas/expand_atlas_mod.F90 | 4 ++-- src/cloudsc_fortran_atlas/validate_atlas_mod.F90 | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index a6d11c27..a3018f4e 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -55,7 +55,7 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) FIELD = FSET%FIELD("PEXTRA") FSPACE = FIELD%FUNCTIONSPACE() - NPROMA = FSPACE%BLOCK_SIZE(1) + NPROMA = FSPACE%NPROMA() NLEV = FSPACE%LEVELS() NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index edcf8c1b..16cf8f38 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -51,7 +51,7 @@ subroutine loadvar_atlas(fset, name, nlon, ngptotg) fspace = field%functionspace() nlev = field%levels() - nproma = fspace%block_size(1) + nproma = fspace%nproma() ngptot = fspace%size() nblocks = fspace%nblks() @@ -115,8 +115,8 @@ subroutine loadstate_atlas(fset, name, nlon, ngptotg) field = fset%field(name) fspace = field%functionspace() nlev = field%levels() - nproma = fspace%block_size(1) ngptot = fspace%size() + nproma = fspace%nproma() nblocks = fspace%nblks() ndim = field%shape(3) - 3 diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 1b1b43f1..3ebcebc0 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -64,7 +64,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) NLEV = FIELD%LEVELS() NGPTOT = FSPACE%SIZE() NBLOCKS = FSPACE%NBLKS() - NPROMA = FSPACE%BLOCK_SIZE(1) + NPROMA = FSPACE%NPROMA() ZMINVAL(1) = +HUGE(ZMINVAL(1)) ZMAX_VAL_ERR(1) = -HUGE(ZMAX_VAL_ERR(1)) From 24b9132391900cd2d616de8fbd0a954e3814f64a Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 23 Aug 2023 16:34:29 +0000 Subject: [PATCH 067/174] bug fix the parallel setup with Atlas structure: 1) number of point per process must be taken from Atlas decomposition; 2) Atlas grid has to get the global number of points --- .../cloudsc_global_atlas_state_mod.F90 | 11 +++++++---- src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 | 7 +++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index ba09384f..71fc1d0e 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -204,7 +204,8 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) ! Load reference input data via serialbox CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET - INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NGPTOT + INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA + INTEGER(KIND=JPIM), INTENT(INOUT) :: NGPTOT INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG TYPE(ATLAS_STRUCTUREDGRID) :: GRID @@ -215,15 +216,17 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) REAL(C_DOUBLE), POINTER :: TMP2D(:,:) TYPE(ATLAS_FIELD) :: FIELD - + TYPE(ATLAS_PARTITIONER) :: PARTITIONER CALL INPUT_INITIALIZE(NAME='input') CALL LOAD_SCALAR('KLON', KLON) CALL LOAD_SCALAR('KLEV', SELF%KLEV) CALL LOAD_SCALAR('KFLDX', SELF%KFLDX) - GRID = ATLAS_REGULARLONLATGRID(NGPTOT, 1) - FSPACE = ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS(GRID, LEVELS=SELF%KLEV, NPROMA=NPROMA, HALO=0) + GRID = ATLAS_REGULARLONLATGRID(NGPTOTG,1) + PARTITIONER = ATLAS_PARTITIONER("bands") + FSPACE = ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS(GRID, PARTITIONER, LEVELS=SELF%KLEV, NPROMA=NPROMA, HALO=0) + NGPTOT = FSPACE%SIZE() SELF%NBLOCKS = FSPACE%NBLKS() FSET = ATLAS_FIELDSET() diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index 20755a9e..4b2f62d8 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -10,7 +10,7 @@ PROGRAM DWARF_CLOUDSC USE PARKIND1, ONLY: JPIM, JPIB -USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK, MPI_COMM_WORLD +USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK USE CLOUDSC_GLOBAL_ATLAS_STATE_MOD, ONLY: CLOUDSC_GLOBAL_ATLAS_STATE USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER USE EC_PMON_MOD, ONLY: EC_PMON @@ -53,7 +53,7 @@ PROGRAM DWARF_CLOUDSC ! Initialize MPI environment CALL CLOUDSC_MPI_INIT(NUMOMP) -CALL ATLAS_INIT(COMM=MPI_COMM_WORLD) +CALL ATLAS_LIBRARY%INITIALISE() ! Get total number of grid points (NGPTOT) with which to run the benchmark IF (IARGS >= 2) THEN @@ -84,9 +84,8 @@ PROGRAM DWARF_CLOUDSC ! Validate the output against serialized reference data CALL GLOBAL_ATLAS_STATE%VALIDATE(FSET, NGPTOT, NGPTOTG) -CALL ATLAS_LIBRARY%FINALISE() - ! Tear down MPI environment +CALL ATLAS_LIBRARY%FINALISE() CALL CLOUDSC_MPI_END() END PROGRAM DWARF_CLOUDSC From b39e918b03d6093339f73b83fcb49fcb444f77c0 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 23 Aug 2023 16:43:49 +0000 Subject: [PATCH 068/174] NGPTOT is controlled by Atlas (tnx Willem) --- src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 | 7 ++++--- .../cloudsc_global_atlas_state_mod.F90 | 7 +++---- src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 | 15 ++++----------- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index a3018f4e..ba9937f4 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -24,19 +24,19 @@ MODULE CLOUDSC_DRIVER_MOD CONTAINS - SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) + SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, KFLDX, PTSPHY) ! Driver routine that performans the parallel NPROMA-blocking and ! invokes the CLOUDSC kernel TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET - INTEGER(KIND=JPIM), INTENT(IN) :: NUMOMP, NGPTOT, NGPTOTG, KFLDX + INTEGER(KIND=JPIM), INTENT(IN) :: NUMOMP, NGPTOTG, KFLDX REAL(KIND=JPRB), INTENT(IN) :: PTSPHY ! Physics timestep TYPE(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW) :: FBLOCK TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(ATLAS_FIELD) :: FIELD - INTEGER(KIND=JPIM) :: NPROMA, NLEV + INTEGER(KIND=JPIM) :: NPROMA, NLEV, NGPTOT INTEGER(KIND=JPIM) :: JKGLO,IBL,ICEND,NGPBLKS @@ -57,6 +57,7 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, KFLDX, PTSPHY) FSPACE = FIELD%FUNCTIONSPACE() NPROMA = FSPACE%NPROMA() NLEV = FSPACE%LEVELS() + NGPTOT = FSPACE%SIZE() NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) 1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGTOT=', i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 71fc1d0e..5ccb174e 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -200,12 +200,11 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) CALL FSET%DATA(49, SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) ! Load reference input data via serialbox CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA - INTEGER(KIND=JPIM), INTENT(INOUT) :: NGPTOT INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG TYPE(ATLAS_STRUCTUREDGRID) :: GRID @@ -217,6 +216,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) REAL(C_DOUBLE), POINTER :: TMP2D(:,:) TYPE(ATLAS_FIELD) :: FIELD TYPE(ATLAS_PARTITIONER) :: PARTITIONER + INTEGER(KIND=JPIM) :: NGPTOT CALL INPUT_INITIALIZE(NAME='input') CALL LOAD_SCALAR('KLON', KLON) @@ -296,11 +296,10 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOT, NGPTOTG) CALL INPUT_FINALIZE() END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, NGPTOT, NGPTOTG) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, NGPTOTG) ! Validate the correctness of output against reference data CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET - INTEGER(KIND=JPIM), INTENT(IN) :: NGPTOT INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG INTEGER(KIND=JPIM) :: KLON, IVAR diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index 4b2f62d8..dd8470cb 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -26,7 +26,6 @@ PROGRAM DWARF_CLOUDSC INTEGER(KIND=JPIM) :: NUMOMP = 1 ! Number of OpenMP threads for this run INTEGER(KIND=JPIM) :: NGPTOTG = 16384 ! Number of grid points (as read from command line) INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active) -INTEGER(KIND=JPIM) :: NGPTOT ! Local number of grid points REAL(c_double), pointer :: tmp3d(:,:,:) type(atlas_fieldset) :: fset @@ -55,18 +54,12 @@ PROGRAM DWARF_CLOUDSC CALL CLOUDSC_MPI_INIT(NUMOMP) CALL ATLAS_LIBRARY%INITIALISE() -! Get total number of grid points (NGPTOT) with which to run the benchmark +! Get total number of grid points (NGPTOTG) with which to run the benchmark IF (IARGS >= 2) THEN CALL GET_COMMAND_ARGUMENT(2, CLARG, LENARG) READ(CLARG(1:LENARG),*) NGPTOTG END IF -! Determine local number of grid points -NGPTOT = (NGPTOTG - 1) / NUMPROC + 1 -if (IRANK == NUMPROC - 1) then - NGPTOT = NGPTOTG - (NUMPROC - 1) * NGPTOT -end if - ! Get the block size (NPROMA) for which to run the benchmark IF (IARGS >= 3) THEN CALL GET_COMMAND_ARGUMENT(3, CLARG, LENARG) @@ -76,13 +69,13 @@ PROGRAM DWARF_CLOUDSC FSET = ATLAS_FIELDSET() ! TODO: Create a global memory state from serialized input data -CALL GLOBAL_ATLAS_STATE%LOAD(FSET, NPROMA, NGPTOT, NGPTOTG) +CALL GLOBAL_ATLAS_STATE%LOAD(FSET, NPROMA, NGPTOTG) ! Call the driver to perform the parallel loop over our kernel -CALL CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOT, NGPTOTG, GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY) +CALL CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY) ! Validate the output against serialized reference data -CALL GLOBAL_ATLAS_STATE%VALIDATE(FSET, NGPTOT, NGPTOTG) +CALL GLOBAL_ATLAS_STATE%VALIDATE(FSET, NGPTOTG) ! Tear down MPI environment CALL ATLAS_LIBRARY%FINALISE() From 20b25e1cf5ec8677a4558a0a0acdade25c333fd6 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 25 Aug 2023 21:20:19 +0100 Subject: [PATCH 069/174] Remove conflict with CUDA option of eckit --- bundle.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bundle.yml b/bundle.yml index 3b2ff9fa..b4b2204d 100644 --- a/bundle.yml +++ b/bundle.yml @@ -5,6 +5,7 @@ name : cloudsc-bundle version : 1.0.0-develop cmake : > CMAKE_LINK_DEPENDS_NO_SHARED=ON + BUILD_serialbox=OFF BUILD_field_api=OFF BUILD_eckit=OFF BUILD_fckit=OFF @@ -22,7 +23,6 @@ projects : version : v2.5.4/patched optional: true cmake : > - BUILD_serialbox=OFF SERIALBOX_BUILD_SHARED=ON SERIALBOX_ENABLE_FORTRAN=ON SERIALBOX_ENABLE_EXPERIMENTAL_FILESYSTEM=OFF @@ -102,7 +102,8 @@ options : - with-cuda : help : Enable GPU kernel variant based on CUDA-Fortran cmake : > - ENABLE_CUDA=ON + ENABLE_FIELD_API_CUDA=ON + ENABLE_CLOUDSC_CUDA=ON ENABLE_CLOUDSC_GPU_SCC_CUF=ON ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON BUILD_field_api=ON From c4b6c9a00d74b72ba659e30d162f4fc47c70aeea Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 25 Aug 2023 21:20:39 +0100 Subject: [PATCH 070/174] Restore Github workflow after merge --- .github/workflows/build.yml | 71 ++++++++++++++----------------------- 1 file changed, 27 insertions(+), 44 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 473f0e4d..14b2d591 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,19 +35,13 @@ jobs: build_flags: - '' # Plain build without any options - - '--with-gpu --with-loki' # Enable Loki and GPU variants - - '--with-gpu --with-loki --with-mpi' # Enable Loki and GPU variants with MPI + - '--with-gpu --with-loki --with-atlas' # Enable Loki, Atlas, and GPU variants + - '--with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, Atlas, and GPU variants with MPI pyiface_flag: [''] # Enable the pyiface variant - atlas_flag: ['--with-atlas'] # Variant using Atlas-managed fields - - claw_flag: [''] # Flag to enable CLAW-generated variants - python_f2py_flag: [''] # Enable the f2py variant - claw_flag: [''] - ctest_exclude_pattern: ['-scc-hoist-'] # Regex to disable CTest tests include: @@ -55,33 +49,30 @@ jobs: - arch: gnu/9.4.0 io_library_flag: '' prec_flag: '' - gpu_flag: '--with-gpu' - cuda_flag: '--with-cuda' - loki_flag: '--with-loki' - atlas_flag: '--with-atlas' - pyiface_flag: '' - python_f2py_flag: '' - - arch: github/ubuntu/nvhpc/21.9 + build_flags: '--cloudsc-fortran-pyiface=ON --cloudsc-python-f2py=ON' + + # Add nvhpc build configurations with serialbox and HDF5 + - arch: nvhpc/21.9 + nvhpc_version: 21.9 + io_library_flag: '' + build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + - arch: nvhpc/21.9 + nvhpc_version: 21.9 io_library_flag: '--with-serialbox' - mpi_flag: '' - prec_flag: '' - gpu_flag: '--with-gpu' - cuda_flag: '--with-cuda' - loki_flag: '--with-loki' - atlas_flag: '--with-atlas' - pyiface_flag: '' - python_f2py_flag: '' - # Add pyiface build configuration for HDF5 only - - arch: github/ubuntu/gnu/9.4.0 + build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + + - arch: nvhpc/23.5 + nvhpc_version: 23.5 io_library_flag: '' - mpi_flag: '' - prec_flag: '' - gpu_flag: '' - cuda_flag: '' - loki_flag: '' - atlas_flag: '' - pyiface_flag: '--cloudsc-fortran-pyiface=ON' - python_f2py_flag: '--cloudsc-python-f2py=ON' + build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + - arch: nvhpc/23.5 + nvhpc_version: 23.5 + io_library_flag: '--with-serialbox' + build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE # Steps represent a sequence of tasks that will be executed as part of the job steps: @@ -147,23 +138,15 @@ jobs: - name: Bundle build run: | ./cloudsc-bundle build --retry-verbose \ - --arch=arch/${{ matrix.arch }} ${{ matrix.prec_flag }} \ - ${{ matrix.mpi_flag }} ${{ matrix.io_library_flag }} ${{ matrix.gpu_flag }} \ - ${{ matrix.claw_flag}} ${{ matrix.loki_flag }} ${{ matrix.cuda_flag }} \ - ${{ matrix.atlas_flag }} ${{ matrix.pyiface_flag }} ${{ matrix.python_f2py_flag }} + --arch=arch/github/ubuntu/${{ matrix.arch }} \ + ${{ matrix.prec_flag }} ${{ matrix.io_library_flag }} ${{ matrix.build_flags }} # Verify targets exist - name: Verify targets env: io_library_flag: ${{ matrix.io_library_flag }} prec_flag: ${{ matrix.prec_flag }} - gpu_flag: ${{ matrix.gpu_flag }} - cuda_flag: ${{ matrix.cuda_flag }} - loki_flag: ${{ matrix.loki_flag }} - atlas_flag: ${{ matrix.atlas_flag }} - claw_flag: ${{ matrix.claw_flag }} - pyiface_flag: ${{ matrix.pyiface_flag }} - python_f2py_flag: ${{ matrix.python_f2py_flag }} + build_flags: ${{ matrix.build_flags }} run: .github/scripts/verify-targets.sh # Run ctest From 1de5e78bbac822169a0a3b52f1b515512c091898 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 25 Aug 2023 21:30:23 +0100 Subject: [PATCH 071/174] Trim down target verification --- .github/scripts/verify-targets.sh | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 28bd2595..b8726c29 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -52,12 +52,9 @@ then fi fi -if [[ "$atlas_flag" == "--with-atlas" ]] +if [[ "$build_flags" == *"--with-atlas"* ]] then targets+=(dwarf-cloudsc-fortran-atlas) - # Atlas builds a number of binaries that end up in bin, too: - targets+=(atlas atlas-atest-mgrids atlas-gaussian-latitudes atlas-grids) - targets+=(atlas-io-list atlas-meshgen fckit) fi if [[ "$build_flags" == *"--cloudsc-fortran-pyiface=ON"* ]] @@ -84,16 +81,4 @@ do fi done -# -# Check there aren't any other binaries -# - -if [[ ${#targets[@]} -lt $(ls build/bin | wc -l) ]] -then - exit_code=1 - echo "::error::Additional targets found in build/bin" - echo "::error::Expected targets: ${targets[@]}" - echo "::error::Found targets: $(ls -1 build/bin | tr '\n' ' ')" -fi - exit $exit_code From 3ef7b5ab4bbf61b173bb8e0b3ecb0596d48dc636 Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Thu, 31 Aug 2023 11:21:20 +0300 Subject: [PATCH 072/174] Manual (or rather semi-automatically generated) HIP versions of cloudsc including arch files for Lumi --- CMakeLists.txt | 2 +- arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh | 51 + .../lumi/cray-gpu/15.0.1/toolchain.cmake | 43 + bundle.yml | 4 + src/CMakeLists.txt | 1 + src/cloudsc_hip/CMakeLists.txt | 168 ++ src/cloudsc_hip/cloudsc/cloudsc_c.cpp | 2632 ++++++++++++++++ src/cloudsc_hip/cloudsc/cloudsc_c.h | 44 + src/cloudsc_hip/cloudsc/cloudsc_c_hoist.cpp | 2651 +++++++++++++++++ src/cloudsc_hip/cloudsc/cloudsc_c_hoist.h | 50 + .../cloudsc/cloudsc_c_k_caching.cpp | 2630 ++++++++++++++++ src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.h | 44 + src/cloudsc_hip/cloudsc/cloudsc_driver.cpp | 607 ++++ src/cloudsc_hip/cloudsc/cloudsc_driver.h | 23 + .../cloudsc/cloudsc_driver_hoist.cpp | 666 +++++ .../cloudsc/cloudsc_driver_hoist.h | 23 + src/cloudsc_hip/cloudsc/cloudsc_validate.cpp | 244 ++ src/cloudsc_hip/cloudsc/cloudsc_validate.h | 17 + src/cloudsc_hip/cloudsc/load_state.cpp | 447 +++ src/cloudsc_hip/cloudsc/load_state.h | 40 + src/cloudsc_hip/cloudsc/mycpu.cpp | 31 + src/cloudsc_hip/cloudsc/mycpu.h | 11 + src/cloudsc_hip/cloudsc/yoecldp_c.h | 145 + src/cloudsc_hip/dwarf_cloudsc.cpp | 44 + 24 files changed, 10617 insertions(+), 1 deletion(-) create mode 100644 arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh create mode 100644 arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake create mode 100644 src/cloudsc_hip/CMakeLists.txt create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_c.cpp create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_c.h create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_c_hoist.cpp create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_c_hoist.h create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.cpp create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.h create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_driver.cpp create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_driver.h create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.h create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_validate.cpp create mode 100644 src/cloudsc_hip/cloudsc/cloudsc_validate.h create mode 100644 src/cloudsc_hip/cloudsc/load_state.cpp create mode 100644 src/cloudsc_hip/cloudsc/load_state.h create mode 100644 src/cloudsc_hip/cloudsc/mycpu.cpp create mode 100644 src/cloudsc_hip/cloudsc/mycpu.h create mode 100644 src/cloudsc_hip/cloudsc/yoecldp_c.h create mode 100644 src/cloudsc_hip/dwarf_cloudsc.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 64403990..9a454395 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,7 +83,7 @@ endif() ecbuild_add_option( FEATURE SERIALBOX DESCRIPTION "Use Serialbox to read input and reference data" REQUIRED_PACKAGES "Serialbox" - CONDITION NOT HAVE_HDF5 + CONDITION NOT HAVE_HDF5 OR HAVE_SERIALBOX DEFAULT OFF ) if( HAVE_SERIALBOX ) list(APPEND CLOUDSC_DEFINITIONS HAVE_SERIALBOX) diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh new file mode 100644 index 00000000..9a66e150 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh @@ -0,0 +1,51 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load PrgEnv-cray/8.3.3 +module_load LUMI/23.03 +# module_load partition/G +module_load rocm/5.2.3 +module_load cce/15.0.1 +module_load cray-libsci/22.08.1.1 +module_load cray-mpich/8.1.18 +module_load craype/2.7.20 +module_load craype-accel-amd-gfx90a +module_load buildtools/23.03 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 +module_load Boost/1.81.0-cpeCray-23.03 +module_load partition/G + +module list + +set -x + +export CC=cc CXX=CC FC=ftn + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake new file mode 100644 index 00000000..4aed4cdc --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake @@ -0,0 +1,43 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI OFF ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( ENABLE_OMP ON CACHE STRING "" ) +set( OpenMP_C_FLAGS "-homp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-homp -hnoacc -hlist=aimd -maxrregcount 64" CACHE STRING "" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( ENABLE_ACC ON CACHE STRING "" ) +set( OpenACC_C_FLAGS "-hacc" ) +set( OpenACC_CXX_FLAGS "-hacc" ) +set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +set(ECBUILD_Fortran_FLAGS_BIT "-O3 -G2 -haggress -DNDEBUG") +# set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") diff --git a/bundle.yml b/bundle.yml index c087d131..77c26c63 100644 --- a/bundle.yml +++ b/bundle.yml @@ -78,6 +78,10 @@ options : ENABLE_CLOUDSC_GPU_SCC_CUF=ON ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON BUILD_field_api=ON + - with-hip : + help: Enable GPU kernel variant based on HIP + cmake: > + ENABLE_CLOUDSC_HIP=ON - with-mpi : help : Enable MPI-parallel kernel diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f6789f08..52f7eb9e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,5 +13,6 @@ add_subdirectory(cloudsc_pyiface) add_subdirectory(cloudsc_python) add_subdirectory(cloudsc_c) add_subdirectory(cloudsc_cuda) +add_subdirectory(cloudsc_hip) add_subdirectory(cloudsc_gpu) add_subdirectory(cloudsc_loki) diff --git a/src/cloudsc_hip/CMakeLists.txt b/src/cloudsc_hip/CMakeLists.txt new file mode 100644 index 00000000..768bee1a --- /dev/null +++ b/src/cloudsc_hip/CMakeLists.txt @@ -0,0 +1,168 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Define this dwarf variant as an ECBuild feature +ecbuild_add_option( FEATURE CLOUDSC_HIP + DESCRIPTION "Build the HIP version CLOUDSC using Serialbox" DEFAULT OFF + CONDITION Serialbox_FOUND +) + +if( HAVE_CLOUDSC_HIP ) + + + if(NOT DEFINED ROCM_PATH) + if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed") + else() + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed") + endif() + endif() + + find_package(hip REQUIRED) + + set(CMAKE_C_COMPILER "${ROCM_PATH}/bin/hipcc") + set(CMAKE_CXX_COMPILER "${ROCM_PATH}/bin/hipcc") + # set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math") + + ###### SCC-HIP #### + ecbuild_add_library( + TARGET dwarf-cloudsc-hip-lib + INSTALL_HEADERS LISTED + SOURCES + cloudsc/yoecldp_c.h + cloudsc/load_state.h + cloudsc/load_state.cpp + cloudsc/cloudsc_c.h + cloudsc/cloudsc_c.cpp + cloudsc/cloudsc_driver.h + cloudsc/cloudsc_driver.cpp + cloudsc/cloudsc_validate.h + cloudsc/cloudsc_validate.cpp + cloudsc/mycpu.h + cloudsc/mycpu.cpp + PUBLIC_INCLUDES + $ + $ + PUBLIC_LIBS + hip::device + Serialbox::Serialbox_C + $<${HAVE_OMP}:OpenMP::OpenMP_C> + ) + + target_include_directories(dwarf-cloudsc-hip-lib PUBLIC $ $) + target_link_libraries(dwarf-cloudsc-hip-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) + + target_compile_options(dwarf-cloudsc-hip-lib PRIVATE --offload-arch=gfx90a) + + ecbuild_add_executable( + TARGET dwarf-cloudsc-hip + SOURCES dwarf_cloudsc.cpp + LIBS dwarf-cloudsc-hip-lib + ) + + ## + + ###### SCC-HOIST-HIP #### + ecbuild_add_library( + TARGET dwarf-cloudsc-hip-hoist-lib + INSTALL_HEADERS LISTED + SOURCES + cloudsc/yoecldp_c.h + cloudsc/load_state.h + cloudsc/load_state.cpp + cloudsc/cloudsc_c_hoist.h + cloudsc/cloudsc_c_hoist.cpp + cloudsc/cloudsc_driver_hoist.h + cloudsc/cloudsc_driver_hoist.cpp + cloudsc/cloudsc_validate.h + cloudsc/cloudsc_validate.cpp + cloudsc/mycpu.h + cloudsc/mycpu.cpp + PUBLIC_INCLUDES + $ + $ + PUBLIC_LIBS + hip::device + Serialbox::Serialbox_C + $<${HAVE_OMP}:OpenMP::OpenMP_C> + ) + + target_include_directories(dwarf-cloudsc-hip-hoist-lib PUBLIC $ $) + target_link_libraries(dwarf-cloudsc-hip-hoist-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) + + target_compile_options(dwarf-cloudsc-hip-hoist-lib PRIVATE --offload-arch=gfx90a) + + ecbuild_add_executable( + TARGET dwarf-cloudsc-hip-hoist + SOURCES dwarf_cloudsc.cpp + LIBS dwarf-cloudsc-hip-hoist-lib + ) + + ## + + ###### SCC-K-CACHING-HIP #### + ecbuild_add_library( + TARGET dwarf-cloudsc-hip-k-caching-lib + INSTALL_HEADERS LISTED + SOURCES + cloudsc/yoecldp_c.h + cloudsc/load_state.h + cloudsc/load_state.cpp + cloudsc/cloudsc_c_k_caching.h + cloudsc/cloudsc_c_k_caching.cpp + cloudsc/cloudsc_driver.h + cloudsc/cloudsc_driver.cpp + cloudsc/cloudsc_validate.h + cloudsc/cloudsc_validate.cpp + cloudsc/mycpu.h + cloudsc/mycpu.cpp + PUBLIC_INCLUDES + $ + $ + PUBLIC_LIBS + hip::device + Serialbox::Serialbox_C + $<${HAVE_OMP}:OpenMP::OpenMP_C> + ) + + target_include_directories(dwarf-cloudsc-hip-k-caching-lib PUBLIC $ $) + target_link_libraries(dwarf-cloudsc-hip-k-caching-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) + + target_compile_options(dwarf-cloudsc-hip-k-caching-lib PRIVATE --offload-arch=gfx90a) + + ecbuild_add_executable( + TARGET dwarf-cloudsc-hip-k-caching + SOURCES dwarf_cloudsc.cpp + LIBS dwarf-cloudsc-hip-k-caching-lib + ) + + ## + + + + # Create symlink for the input data + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-hip-serial + COMMAND bin/dwarf-cloudsc-hip + ARGS 1 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) + ecbuild_add_test( + TARGET dwarf-cloudsc-hip-omp + COMMAND bin/dwarf-cloudsc-hip + ARGS 4 100 16 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 4 + CONDITION HAVE_OMP + ) + +endif() diff --git a/src/cloudsc_hip/cloudsc/cloudsc_c.cpp b/src/cloudsc_hip/cloudsc/cloudsc_c.cpp new file mode 100644 index 00000000..1bca62cc --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_c.cpp @@ -0,0 +1,2632 @@ +#include "hip/hip_runtime.h" +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include "cloudsc_c.h" +#include + +__global__ void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2) { + + //------------------------------------------------------------------------------- + // Declare input/output arguments + //------------------------------------------------------------------------------- + + // PLCRIT_AER : critical liquid mmr for rain autoconversion process + // PICRIT_AER : critical liquid mmr for snow autoconversion process + // PRE_LIQ : liq Re + // PRE_ICE : ice Re + // PCCN : liquid cloud condensation nuclei + // PNICE : ice number concentration (cf. CCN) + + const int klev = 137; // Number of levels + + double zlcond1, zlcond2, zlevapl, zlevapi, zrainaut, zsnowaut, zliqcld, zicecld; + double zlevap, zleros; + // condensation and evaporation terms + // autoconversion terms + double zfokoop; + double zfoealfa[klev + 1]; + double zicenuclei; // number concentration of ice nuclei + + double zlicld; + double zacond; + double zaeros; + double zlfinalsum; + double zdqs; + double ztold; + double zqold; + double zdtgdp; + double zrdtgdp; + double ztrpaus; + double zcovpclr; + double zpreclr; + double zcovptot; + double zcovpmax; + double zqpretot; + double zdpevap; + double zdtforc; + double zdtdiab; + double ztp1[klev]; + double zldefr; + double zldifdt; + double zdtgdpf; + double zlcust[5]; + double zacust; + double zmf; + + double zrho; + double ztmp1, ztmp2, ztmp3; + double ztmp4, ztmp5, ztmp6, ztmp7; + double zalfawm; + + // Accumulators of A,B,and C factors for cloud equations + double zsolab; // -ve implicit CC + double zsolac; // linear CC + double zanew; + double zanewm1; + + double zgdp; + + //---for flux calculation + double zda; + double zli[klev], za[klev]; + double zaorig[klev]; // start of scheme value for CC + + int llflag; + int llo1; + + int icall, ik, jk, jl, jm, jn, jo, jlen, is; + + double zdp, zpaphd; + + double zalfa; + // & ZALFACU, ZALFALS + double zalfaw; + double zbeta, zbeta1; + //REAL(KIND=JPRB) :: ZBOTT + double zcfpr; + double zcor; + double zcdmax; + double zmin; + double zlcondlim; + double zdenom; + double zdpmxdt; + double zdpr; + double zdtdp; + double ze; + double zepsec; + double zfac, zfaci, zfacw; + double zgdcp; + double zinew; + double zlcrit; + double zmfdn; + double zprecip; + double zqe; + double zqsat, zqtmst, zrdcp; + double zrhc, zsig, zsigk; + double zwtot; + double zzco, zzdl, zzrh, zzzdt, zqadj; + double zqnew, ztnew; + double zrg_r, zgdph_r, zcons1, zcond, zcons1a; + double zlfinal; + double zmelt; + double zevap; + double zfrz; + double zvpliq, zvpice; + double zadd, zbdd, zcvds, zice0, zdepos; + double zsupsat; + double zfall; + double zre_ice; + double zrldcp; + double zqp1env; + + //---------------------------- + // Arrays for new microphysics + //---------------------------- + int iphase[5]; // marker for water phase of each species + // 0=vapour, 1=liquid, 2=ice + + int imelt[5]; // marks melting linkage for ice categories + // ice->liquid, snow->rain + + int llfall[5]; // marks falling species + // LLFALL=0, cloud cover must > 0 for zqx > 0 + // LLFALL=1, no cloud needed, zqx can evaporate + + int llindex1[5]; // index variable + int llindex3[5 * 5]; // index variable + double zmax; + double zrat; + int iorder[5]; // array for sorting explicit terms + + double zliqfrac[klev]; // cloud liquid water fraction: ql/(ql+qi) + double zicefrac[klev]; // cloud ice water fraction: qi/(ql+qi) + double zqxn[5]; // new values for zqx at time+1 + double zqxfg[5]; // first guess values including precip + double zqxnm1[5]; // new values for zqx at time+1 at level above + double zfluxq[5]; // fluxes convergence of species (needed?) + // Keep the following for possible future total water variance scheme? + //REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + //REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + //REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + //REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + //REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + double zmeltmax; + double zfrzmax; + double zicetot; + + + double zqsmix[klev]; // diagnostic mixed phase saturation + //REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + double zqsliq[klev]; // liquid water saturation + double zqsice[klev]; // ice water saturation + + //REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + //REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + //REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + double zfoeewmt[klev]; + double zfoeew[klev]; + double zfoeeliqt[klev]; + //REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + double zdqsliqdt, zdqsicedt, zdqsmixdt; + double zcorqsliq; + double zcorqsice; + //REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + double zcorqsmix; + double zevaplimliq, zevaplimice, zevaplimmix; + + //------------------------------------------------------- + // SOURCE/SINK array for implicit and explicit terms + //------------------------------------------------------- + // a POSITIVE value entered into the arrays is a... + // Source of this variable + // | + // | Sink of this variable + // | | + // V V + // ZSOLQA(JL,IQa,IQb) = explicit terms + // ZSOLQB(JL,IQa,IQb) = implicit terms + // Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + // a source of NCLDQL and a sink of IQV + // put 'magic' source terms such as PLUDE from + // detrainment into explicit source/sink array diagnognal + // ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + // i.e. A positive value is a sink!????? weird... + //------------------------------------------------------- + + double zsolqa[5 * 5]; // explicit sources and sinks + double zsolqb[5 * 5]; // implicit sources and sinks + // e.g. microphysical pathways between ice variables. + double zqlhs[5 * 5]; // n x n matrix storing the LHS of implicit solver + double zvqx[5]; // fall speeds of three categories + double zexplicit; + double zratio[5], zsinksum[5]; + + // for sedimentation source/sink terms + double zfallsink[5]; + double zfallsrce[5]; + + // for convection detrainment source and subsidence source/sink terms + double zconvsrce[5]; + double zconvsink[5]; + + // for supersaturation source term from previous timestep + double zpsupsatsrce[5]; + + // Numerical fit to wet bulb temperature + double ztw1 = (double) 1329.31; + double ztw2 = (double) 0.0074615; + double ztw3 = (double) 0.85E5; + double ztw4 = (double) 40.637; + double ztw5 = (double) 275.0; + + double zsubsat; // Subsaturation for snow melting term + double ztdmtw0; // Diff between dry-bulb temperature and + // temperature when wet-bulb = 0degC + + // Variables for deposition term + double ztcg; // Temperature dependent function for ice PSD + double zfacx1i, zfacx1s; // PSD correction factor + double zaplusb, zcorrfac, zcorrfac2, zpr02, zterm1, zterm2; // for ice dep + double zcldtopdist; // Distance from cloud top + double zinfactor; // No. of ice nuclei factor for deposition + + // Autoconversion/accretion/riming/evaporation + int iwarmrain; + int ievaprain; + int ievapsnow; + int idepice; + double zrainacc; + double zraincld; + double zsnowrime; + double zsnowcld; + double zesatliq; + double zfallcorr; + double zlambda; + double zevap_denom; + double zcorr2; + double zka; + double zconst; + double ztemp; + + // Rain freezing + int llrainliq; // True if majority of raindrops are liquid (no ice core) + + //---------------------------- + // End: new microphysics + //---------------------------- + + //---------------------- + // SCM budget statistics + //---------------------- + double zrain; + + double zhook_handle; + double ztmpl, ztmpi, ztmpa; + + double zmm, zrr; + double zrg; + + double zzsum, zzratio; + double zepsilon; + + double zcond1, zqp; + + double psum_solqa; + int ibl; + int i_llfall_0; + double zqx[5 * klev]; + double zqx0[5 * klev]; + double zpfplsx[5 * (klev + 1)]; + double zlneg[5 * klev]; + double zqxn2d[5 * klev]; + + jl = threadIdx.x; + ibl = blockIdx.x; + + + //=============================================================================== + //IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) + + //=============================================================================== + // 0.0 Beginning of timestep book-keeping + //---------------------------------------------------------------------- + + + //###################################################################### + // 0. *** SET UP CONSTANTS *** + //###################################################################### + + zepsilon = (double) 100.*DBL_EPSILON; + + // --------------------------------------------------------------------- + // Set version of warm-rain autoconversion/accretion + // IWARMRAIN = 1 ! Sundquist + // IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + // --------------------------------------------------------------------- + iwarmrain = 2; + // --------------------------------------------------------------------- + // Set version of rain evaporation + // IEVAPRAIN = 1 ! Sundquist + // IEVAPRAIN = 2 ! Abel and Boutle (2013) + // --------------------------------------------------------------------- + ievaprain = 2; + // --------------------------------------------------------------------- + // Set version of snow evaporation + // IEVAPSNOW = 1 ! Sundquist + // IEVAPSNOW = 2 ! New + // --------------------------------------------------------------------- + ievapsnow = 1; + // --------------------------------------------------------------------- + // Set version of ice deposition + // IDEPICE = 1 ! Rotstayn (2001) + // IDEPICE = 2 ! New + // --------------------------------------------------------------------- + idepice = 1; + + // --------------------- + // Some simple constants + // --------------------- + zqtmst = (double) 1.0 / ptsphy; + zgdcp = rg / rcpd; + zrdcp = rd / rcpd; + zcons1a = rcpd / (rlmlt*rg*(*yrecldp).rtaumel); + zepsec = (double) 1.E-14; + zrg_r = (double) 1.0 / rg; + zrldcp = (double) 1.0 / (ralsdcp - ralvdcp); + + // Note: Defined in module/yoecldp.F90 + // NCLDQL=1 ! liquid cloud water + // NCLDQI=2 ! ice cloud water + // NCLDQR=3 ! rain water + // NCLDQS=4 ! snow + // NCLDQV=5 ! vapour + + // ----------------------------------------------- + // Define species phase, 0=vapour, 1=liquid, 2=ice + // ----------------------------------------------- + iphase[4] = 0; + iphase[0] = 1; + iphase[2] = 1; + iphase[1] = 2; + iphase[3] = 2; + + // --------------------------------------------------- + // Set up melting/freezing index, + // if an ice category melts/freezes, where does it go? + // --------------------------------------------------- + imelt[4] = -99; + imelt[0] = 2; + imelt[2] = 4; + imelt[1] = 3; + imelt[3] = 3; + + // ----------------------------------------------- + // INITIALIZATION OF OUTPUT TENDENCIES + // ----------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + tendency_loc_a[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + } + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))] = (double) 0.0; + } + } + + //-- These were uninitialized : meaningful only when we compare error differences + for (jk = 0; jk <= klev + -1; jk += 1) { + pcovptot[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + tendency_loc_cld[jl + klon*(jk + klev*(4 + 5*(ibl)))] = (double) 0.0 + ; + } + + // ------------------------- + // set up fall speeds in m/s + // ------------------------- + zvqx[4] = (double) 0.0; + zvqx[0] = (double) 0.0; + zvqx[1] = (*yrecldp).rvice; + zvqx[2] = (*yrecldp).rvrain; + zvqx[3] = (*yrecldp).rvsnow; + for (i_llfall_0 = 0; i_llfall_0 <= 5 + -1; i_llfall_0 += 1) { + llfall[i_llfall_0] = false; + } + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (zvqx[jm] > (double) 0.0) { + llfall[jm] = true; + } + // falling species + } + // Set LLFALL to false for ice (but ice still sediments!) + // Need to rationalise this at some point + llfall[1] = false; + + + //###################################################################### + // 1. *** INITIAL VALUES FOR VARIABLES *** + //###################################################################### + + + // ---------------------- + // non CLV initialization + // ---------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + ztp1[jk] = pt[jl + klon*(jk + klev*(ibl))] + ptsphy*tendency_tmp_t[ + jl + klon*(jk + klev*(ibl))]; + zqx[jk + klev*(4)] = pq[jl + klon*(jk + klev*(ibl))] + + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*(ibl))]; + zqx0[jk + klev*(4)] = pq[jl + klon*(jk + klev*(ibl))] + + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*(ibl))]; + za[jk] = pa[jl + klon*(jk + klev*(ibl))] + ptsphy*tendency_tmp_a[jl + + klon*(jk + klev*(ibl))]; + zaorig[jk] = pa[jl + klon*(jk + klev*(ibl))] + ptsphy*tendency_tmp_a[ + jl + klon*(jk + klev*(ibl))]; + } + + // ------------------------------------- + // initialization for CLV family + // ------------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqx[jk + klev*jm] = pclv[jl + klon*(jk + klev*(jm + 5*(ibl)))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))]; + zqx0[jk + klev*jm] = pclv[jl + klon*(jk + klev*(jm + 5*(ibl)))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))]; + } + } + + //------------- + // zero arrays + //------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + zpfplsx[jk + (klev + 1)*jm] = (double) 0.0; // precip fluxes + } + } + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqxn2d[jk + klev*jm] = (double) 0.0; // end of timestep values in 2D + zlneg[jk + klev*jm] = (double) 0.0; // negative input check + } + } + + prainfrac_toprfz[jl + klon*(ibl)] = (double) 0.0; // rain fraction at top of refreezing layer + llrainliq = true; // Assume all raindrops are liquid initially + + // ---------------------------------------------------- + // Tidy up very small cloud cover or total cloud water + // ---------------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + if (zqx[jk + klev*(0)] + zqx[jk + klev*(1)] < (*yrecldp).rlmin || za[jk] + < (*yrecldp).ramin) { + + // Evaporate small cloud liquid water amounts + zlneg[jk + klev*(0)] = zlneg[jk + klev*(0)] + zqx[jk + klev*(0)]; + zqadj = zqx[jk + klev*(0)]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = + tendency_loc_q[jl + klon*(jk + klev*(ibl))] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralvdcp*zqadj; + zqx[jk + klev*(4)] = zqx[jk + klev*(4)] + zqx[jk + klev*(0)]; + zqx[jk + klev*(0)] = (double) 0.0; + + // Evaporate small cloud ice water amounts + zlneg[jk + klev*(1)] = zlneg[jk + klev*(1)] + zqx[jk + klev*(1)]; + zqadj = zqx[jk + klev*(1)]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = + tendency_loc_q[jl + klon*(jk + klev*(ibl))] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralsdcp*zqadj; + zqx[jk + klev*(4)] = zqx[jk + klev*(4)] + zqx[jk + klev*(1)]; + zqx[jk + klev*(1)] = (double) 0.0; + + // Set cloud cover to zero + za[jk] = (double) 0.0; + + } + } + + // --------------------------------- + // Tidy up small CLV variables + // --------------------------------- + //DIR$ IVDEP + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + //DIR$ IVDEP + for (jk = 0; jk <= klev + -1; jk += 1) { + //DIR$ IVDEP + if (zqx[jk + klev*jm] < (*yrecldp).rlmin) { + zlneg[jk + klev*jm] = zlneg[jk + klev*jm] + zqx[jk + klev*jm]; + zqadj = zqx[jk + klev*jm]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = + tendency_loc_q[jl + klon*(jk + klev*(ibl))] + zqadj; + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralvdcp*zqadj; + } + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralsdcp*zqadj; + } + zqx[jk + klev*(4)] = zqx[jk + klev*(4)] + zqx[jk + klev*jm]; + zqx[jk + klev*jm] = (double) 0.0; + } + } + } + + + // ------------------------------ + // Define saturation values + // ------------------------------ + for (jk = 0; jk <= klev + -1; jk += 1) { + //---------------------------------------- + // old *diagnostic* mixed phase saturation + //---------------------------------------- + zfoealfa[jk] = ((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))); + zfoeewmt[jk] = + fmin(((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)) + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))) / pap[jl + klon*(jk + klev*(ibl))], (double) 0.5); + zqsmix[jk] = zfoeewmt[jk]; + zqsmix[jk] = zqsmix[jk] / ((double) 1.0 - retv*zqsmix[jk]); + + //--------------------------------------------- + // ice saturation T<273K + // liquid water saturation for T>273K + //--------------------------------------------- + zalfa = ((double)(fmax(0.0, copysign(1.0, ztp1[jk] - rtt)))); + zfoeew[jk] = fmin((zalfa*((double)(r2es*exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))) + ((double) 1.0 - zalfa)*((double)(r2es*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))) / pap[jl + + klon*(jk + klev*(ibl))], (double) 0.5); + zfoeew[jk] = fmin((double) 0.5, zfoeew[jk]); + zqsice[jk] = zfoeew[jk] / ((double) 1.0 - retv*zfoeew[jk]); + + //---------------------------------- + // liquid water saturation + //---------------------------------- + zfoeeliqt[jk] = + fmin(((double)(r2es*exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))) / pap[jl + klon*(jk + klev*(ibl))], (double) 0.5); + zqsliq[jk] = zfoeeliqt[jk]; + zqsliq[jk] = zqsliq[jk] / ((double) 1.0 - retv*zqsliq[jk]); + + // !---------------------------------- + // ! ice water saturation + // !---------------------------------- + // ZFOEEICET(JL,JK)=MIN(((double)(r2es*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + // ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + // ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + + } + + for (jk = 0; jk <= klev + -1; jk += 1) { + + + //------------------------------------------ + // Ensure cloud fraction is between 0 and 1 + //------------------------------------------ + za[jk] = fmax((double) 0.0, fmin((double) 1.0, za[jk])); + + //------------------------------------------------------------------- + // Calculate liq/ice fractions (no longer a diagnostic relationship) + //------------------------------------------------------------------- + zli[jk] = zqx[jk + klev*(0)] + zqx[jk + klev*(1)]; + if (zli[jk] > (*yrecldp).rlmin) { + zliqfrac[jk] = zqx[jk + klev*(0)] / zli[jk]; + zicefrac[jk] = (double) 1.0 - zliqfrac[jk]; + } else { + zliqfrac[jk] = (double) 0.0; + zicefrac[jk] = (double) 0.0; + } + + } + + //###################################################################### + // 2. *** CONSTANTS AND PARAMETERS *** + //###################################################################### + // Calculate L in updrafts of bl-clouds + // Specify QS, P/PS for tropopause (for c2) + // And initialize variables + //------------------------------------------ + + //--------------------------------- + // Find tropopause level (ZTRPAUS) + //--------------------------------- + ztrpaus = (double) 0.1; + zpaphd = (double) 1.0 / paph[jl + klon*(klev + (klev + 1)*(ibl))]; + for (jk = 0; jk <= klev - 1 + -1; jk += 1) { + zsig = pap[jl + klon*(jk + klev*(ibl))]*zpaphd; + if (zsig > (double) 0.1 && zsig < (double) 0.4 && ztp1[jk] > ztp1[1 + jk]) { + ztrpaus = zsig; + } + } + + //----------------------------- + // Reset single level variables + //----------------------------- + + zanewm1 = (double) 0.0; + zda = (double) 0.0; + zcovpclr = (double) 0.0; + zcovpmax = (double) 0.0; + zcovptot = (double) 0.0; + zcldtopdist = (double) 0.0; + + //###################################################################### + // 3. *** PHYSICS *** + //###################################################################### + + + //---------------------------------------------------------------------- + // START OF VERTICAL LOOP + //---------------------------------------------------------------------- + + for (jk = -1 + (*yrecldp).ncldtop; jk <= klev + -1; jk += 1) { + + //---------------------------------------------------------------------- + // 3.0 INITIALIZE VARIABLES + //---------------------------------------------------------------------- + + //--------------------------------- + // First guess microphysics + //--------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxfg[jm] = zqx[jk + klev*jm]; + } + + //--------------------------------- + // Set KLON arrays to zero + //--------------------------------- + + zlicld = (double) 0.0; + zrainaut = (double) 0.0; // currently needed for diags + zrainacc = (double) 0.0; // currently needed for diags + zsnowaut = (double) 0.0; // needed + zldefr = (double) 0.0; + zacust = (double) 0.0; // set later when needed + zqpretot = (double) 0.0; + zlfinalsum = (double) 0.0; + + // Required for first guess call + zlcond1 = (double) 0.0; + zlcond2 = (double) 0.0; + zsupsat = (double) 0.0; + zlevapl = (double) 0.0; + zlevapi = (double) 0.0; + + //------------------------------------- + // solvers for cloud fraction + //------------------------------------- + zsolab = (double) 0.0; + zsolac = (double) 0.0; + + zicetot = (double) 0.0; + + //------------------------------------------ + // reset matrix so missing pathways are set + //------------------------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsolqb[jn + 5*jm] = (double) 0.0; + zsolqa[jn + 5*jm] = (double) 0.0; + } + } + + //---------------------------------- + // reset new microphysics variables + //---------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zfallsrce[jm] = (double) 0.0; + zfallsink[jm] = (double) 0.0; + zconvsrce[jm] = (double) 0.0; + zconvsink[jm] = (double) 0.0; + zpsupsatsrce[jm] = (double) 0.0; + zratio[jm] = (double) 0.0; + } + + + //------------------------- + // derived variables needed + //------------------------- + + zdp = paph[jl + klon*(1 + jk + (klev + 1)*(ibl))] - paph[jl + + klon*(jk + (klev + 1)*(ibl))]; // dp + zgdp = rg / zdp; // g/dp + zrho = pap[jl + klon*(jk + klev*(ibl))] / (rd*ztp1[jk]); // p/RT air density + + zdtgdp = ptsphy*zgdp; // dt g/dp + zrdtgdp = zdp*((double) 1.0 / (ptsphy*rg)); // 1/(dt g/dp) + + if (jk + 1 > 1) { + zdtgdpf = ptsphy*rg / (pap[jl + klon*(jk + klev*(ibl))] - pap[jl + + klon*(-1 + jk + klev*(ibl))]); + } + + //------------------------------------ + // Calculate dqs/dT correction factor + //------------------------------------ + // Reminder: RETV=RV/RD-1 + + // liquid + zfacw = r5les / (pow((ztp1[jk] - r4les), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeeliqt[jk]); + zdqsliqdt = zfacw*zcor*zqsliq[jk]; + zcorqsliq = (double) 1.0 + ralvdcp*zdqsliqdt; + + // ice + zfaci = r5ies / (pow((ztp1[jk] - r4ies), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeew[jk]); + zdqsicedt = zfaci*zcor*zqsice[jk]; + zcorqsice = (double) 1.0 + ralsdcp*zdqsicedt; + + // diagnostic mixed + zalfaw = zfoealfa[jk]; + zalfawm = zalfaw; + zfac = zalfaw*zfacw + ((double) 1.0 - zalfaw)*zfaci; + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeewmt[jk]); + zdqsmixdt = zfac*zcor*zqsmix[jk]; + zcorqsmix = (double) 1.0 + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zdqsmixdt; + + // evaporation/sublimation limits + zevaplimmix = + fmax((zqsmix[jk] - zqx[jk + klev*(4)]) / zcorqsmix, (double) 0.0); + zevaplimliq = + fmax((zqsliq[jk] - zqx[jk + klev*(4)]) / zcorqsliq, (double) 0.0); + zevaplimice = + fmax((zqsice[jk] - zqx[jk + klev*(4)]) / zcorqsice, (double) 0.0); + + //-------------------------------- + // in-cloud consensate amount + //-------------------------------- + ztmpa = (double) 1.0 / fmax(za[jk], zepsec); + zliqcld = zqx[jk + klev*(0)]*ztmpa; + zicecld = zqx[jk + klev*(1)]*ztmpa; + zlicld = zliqcld + zicecld; + + + //------------------------------------------------ + // Evaporate very small amounts of liquid and ice + //------------------------------------------------ + + if (zqx[jk + klev*(0)] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(0)] = zqx[jk + klev*(0)]; + zsolqa[0 + 5*(4)] = -zqx[jk + klev*(0)]; + } + + if (zqx[jk + klev*(1)] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(1)] = zqx[jk + klev*(1)]; + zsolqa[1 + 5*(4)] = -zqx[jk + klev*(1)]; + } + + + //--------------------------------------------------------------------- + // 3.1 ICE SUPERSATURATION ADJUSTMENT + //--------------------------------------------------------------------- + // Note that the supersaturation adjustment is made with respect to + // liquid saturation: when T>0C + // ice saturation: when T<0C + // with an adjustment made to allow for ice + // supersaturation in the clear sky + // Note also that the KOOP factor automatically clips the supersaturation + // to a maximum set by the liquid water saturation mixing ratio + // important for temperatures near to but below 0C + //----------------------------------------------------------------------- + + //DIR$ NOFUSION + + //----------------------------------- + // 3.1.1 Supersaturation limit (from Koop) + //----------------------------------- + // Needs to be set for all temperatures + zfokoop = ((double)(fmin(rkoop1 - rkoop2*ztp1[jk], (double)(r2es*exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))*1.0/(double)(r2es*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies)))))); + + if (ztp1[jk] >= rtt || (*yrecldp).nssopt == 0) { + zfac = (double) 1.0; + zfaci = (double) 1.0; + } else { + zfac = za[jk] + zfokoop*((double) 1.0 - za[jk]); + zfaci = ptsphy / (*yrecldp).rkooptau; + } + + //------------------------------------------------------------------- + // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + // correction factor + // [#Note: QSICE or QSLIQ] + //------------------------------------------------------------------- + + // Calculate supersaturation to add to cloud + if (za[jk] > (double) 1.0 - (*yrecldp).ramin) { + zsupsat = + fmax((zqx[jk + klev*(4)] - zfac*zqsice[jk]) / zcorqsice, (double) 0.0); + } else { + // Calculate environmental humidity supersaturation + zqp1env = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / fmax((double) 1.0 - + za[jk], zepsilon); + //& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + zsupsat = fmax(((double) 1.0 - za[jk])*(zqp1env - zfac*zqsice[jk]) / zcorqsice, + (double) 0.0); + } + + //------------------------------------------------------------------- + // Here the supersaturation is turned into liquid water + // However, if the temperature is below the threshold for homogeneous + // freezing then the supersaturation is turned instantly to ice. + //-------------------------------------------------------------------- + + if (zsupsat > zepsec) { + + if (ztp1[jk] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zsupsat; + // Include liquid in first guess + zqxfg[0] = zqxfg[0] + zsupsat; + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zsupsat; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + zsupsat; + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk])*zfaci; + + } + + //------------------------------------------------------- + // 3.1.3 Include supersaturation from previous timestep + // (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + //------------------------------------------------------- + if (psupsat[jl + klon*(jk + klev*(ibl))] > zepsec) { + if (ztp1[jk] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(0)] = + zsolqa[0 + 5*(0)] + psupsat[jl + klon*(jk + klev*(ibl))]; + zpsupsatsrce[0] = psupsat[jl + klon*(jk + klev*(ibl))]; + // Add liquid to first guess for deposition term + zqxfg[0] = zqxfg[0] + psupsat[jl + klon*(jk + klev*(ibl))]; + // Store cloud budget diagnostics if required + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(1)] = + zsolqa[1 + 5*(1)] + psupsat[jl + klon*(jk + klev*(ibl))]; + zpsupsatsrce[1] = psupsat[jl + klon*(jk + klev*(ibl))]; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + psupsat[jl + klon*(jk + klev*(ibl))]; + // Store cloud budget diagnostics if required + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk])*zfaci; + // Store cloud budget diagnostics if required + } + + // on JL + + //--------------------------------------------------------------------- + // 3.2 DETRAINMENT FROM CONVECTION + //--------------------------------------------------------------------- + // * Diagnostic T-ice/liq split retained for convection + // Note: This link is now flexible and a future convection + // scheme can detrain explicit seperate budgets of: + // cloud water, ice, rain and snow + // * There is no (1-ZA) multiplier term on the cloud detrainment + // term, since is now written in mass-flux terms + // [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + //--------------------------------------------------------------------- + if (jk + 1 < klev && jk + 1 >= (*yrecldp).ncldtop) { + + + plude[jl + klon*(jk + klev*(ibl))] = + plude[jl + klon*(jk + klev*(ibl))]*zdtgdp; + + if (/*ldcum[jl + klon*(ibl)] &&*/ plude[jl + klon*(jk + klev*(ibl + ))] > (*yrecldp).rlmin && plu[jl + klon*(1 + jk + klev*(ibl))] > + zepsec) { + + zsolac = zsolac + plude[jl + klon*(jk + klev*(ibl))] / plu[jl + + klon*(1 + jk + klev*(ibl))]; + // *diagnostic temperature split* + zalfaw = zfoealfa[jk]; + zconvsrce[0] = zalfaw*plude[jl + klon*(jk + klev*(ibl))]; + zconvsrce[1] = + ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*(ibl))]; + zsolqa[0 + 5*(0)] = zsolqa[0 + 5*(0)] + zconvsrce[0]; + zsolqa[1 + 5*(1)] = zsolqa[1 + 5*(1)] + zconvsrce[1]; + + } else { + + plude[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + + } + // *convective snow detrainment source + //if (ldcum[jl + klon*(ibl)]) { + zsolqa[3 + 5*(3)] = zsolqa[3 + 5*(3)] + psnde[jl + + klon*(jk + klev*(ibl))]*zdtgdp; + //} + + + } + // JK (*yrecldp).ncldtop) { + + zmf = fmax((double) 0.0, (pmfu[jl + klon*(jk + klev*(ibl))] + pmfd[-1 + + jl + klon*(jk + klev*(ibl))])*zdtgdp); + zacust = zmf*zanewm1; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlcust[jm] = zmf*zqxnm1[jm]; + // record total flux for enthalpy budget: + zconvsrce[jm] = zconvsrce[jm] + zlcust[jm]; + } + } + + // Now have to work out how much liquid evaporates at arrival point + // since there is no prognostic memory for in-cloud humidity, i.e. + // we always assume cloud is saturated. + + zdtdp = zrdcp*(double) 0.5*(ztp1[-1 + jk] + ztp1[jk]) / paph[jl + klon*(jk + + (klev + 1)*(ibl))]; + zdtforc = zdtdp*(pap[jl + klon*(jk + klev*(ibl))] - pap[jl + + klon*(-1 + jk + klev*(ibl))]); + //[#Note: Diagnostic mixed phase should be replaced below] + zdqs = zanewm1*zdtforc*zdqsmixdt; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlfinal = fmax((double) 0.0, zlcust[jm] - zdqs); //lim to zero + // no supersaturation allowed incloud ---V + zevap = fmin((zlcust[jm] - zlfinal), zevaplimmix); + // ZEVAP=0.0_JPRB + zlfinal = zlcust[jm] - zevap; + zlfinalsum = zlfinalsum + zlfinal; // sum + + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zlcust[jm]; // whole sum + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zevap; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zevap; + } + } + + // Reset the cloud contribution if no cloud water survives to this level: + if (zlfinalsum < zepsec) { + zacust = (double) 0.0; + } + zsolac = zsolac + zacust; + + } + // on JK>NCLDTOP + + //--------------------------------------------------------------------- + // Subsidence sink of cloud to the layer below + // (Implicit - re. CFL limit on convective mass flux) + //--------------------------------------------------------------------- + + + if (jk + 1 < klev) { + + zmfdn = fmax((double) 0.0, (pmfu[jl + klon*(1 + jk + klev*(ibl))] + + pmfd[jl + klon*(1 + jk + klev*(ibl))])*zdtgdp); + + zsolab = zsolab + zmfdn; + zsolqb[0 + 5*(0)] = zsolqb[0 + 5*(0)] + zmfdn; + zsolqb[1 + 5*(1)] = zsolqb[1 + 5*(1)] + zmfdn; + + // Record sink for cloud budget and enthalpy budget diagnostics + zconvsink[0] = zmfdn; + zconvsink[1] = zmfdn; + + } + + + //---------------------------------------------------------------------- + // 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + //---------------------------------------------------------------------- + // NOTE: In default tiedtke scheme this process decreases the cloud + // area but leaves the specific cloud water content + // within clouds unchanged + //---------------------------------------------------------------------- + + // ------------------------------ + // Define turbulent erosion rate + // ------------------------------ + zldifdt = (*yrecldp).rcldiff*ptsphy; //original version + //Increase by factor of 5 for convective points + if (ktype[jl + klon*(ibl)] > 0 && plude[jl + klon*(jk + klev*( + ibl))] > zepsec) { + zldifdt = (*yrecldp).rcldiff_convi*zldifdt; + } + + // At the moment, works on mixed RH profile and partitioned ice/liq fraction + // so that it is similar to previous scheme + // Should apply RHw for liquid cloud and RHi for ice cloud separately + if (zli[jk] > zepsec) { + // Calculate environmental humidity + // ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + // & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + // ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ze = zldifdt*fmax(zqsmix[jk] - zqx[jk + klev*(4)], (double) 0.0); + zleros = za[jk]*ze; + zleros = fmin(zleros, zevaplimmix); + zleros = fmin(zleros, zli[jk]); + zaeros = zleros / zlicld; //if linear term + + // Erosion is -ve LINEAR in L,A + zsolac = zsolac - zaeros; //linear + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jk]*zleros; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jk]*zleros; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jk]*zleros; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jk]*zleros; + + } + + //---------------------------------------------------------------------- + // 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + //---------------------------------------------------------------------- + // calculate dqs/dt + // Note: For the separate prognostic Qi and Ql, one would ideally use + // Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + // forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + // These would then instantaneous freeze if T<-38C or lead to ice growth + // by deposition in warmer mixed phase clouds. However, since we do + // not have a separate prognostic equation for in-cloud humidity or a + // statistical scheme approach in place, the depositional growth of ice + // in the mixed phase can not be modelled and we resort to supersaturation + // wrt ice instanteously converting to ice over one timestep + // (see Tompkins et al. QJRMS 2007 for details) + // Thus for the initial implementation the diagnostic mixed phase is + // retained for the moment, and the level of approximation noted. + //---------------------------------------------------------------------- + + zdtdp = zrdcp*ztp1[jk] / pap[jl + klon*(jk + klev*(ibl))]; + zdpmxdt = zdp*zqtmst; + zmfdn = (double) 0.0; + if (jk + 1 < klev) { + zmfdn = pmfu[jl + klon*(1 + jk + klev*(ibl))] + pmfd[jl + klon*(1 + + jk + klev*(ibl))]; + } + zwtot = pvervel[jl + klon*(jk + klev*(ibl))] + (double) 0.5*rg*(pmfu[ + jl + klon*(jk + klev*(ibl))] + pmfd[jl + klon*(jk + klev*(ibl))] + + zmfdn); + zwtot = fmin(zdpmxdt, fmax(-zdpmxdt, zwtot)); + zzzdt = phrsw[jl + klon*(jk + klev*(ibl))] + phrlw[jl + klon*(jk + + klev*(ibl))]; + zdtdiab = fmin(zdpmxdt*zdtdp, fmax(-zdpmxdt*zdtdp, zzzdt))*ptsphy + ralfdcp*zldefr; + // Note: ZLDEFR should be set to the difference between the mixed phase functions + // in the convection and cloud scheme, but this is not calculated, so is zero and + // the functions must be the same + zdtforc = zdtdp*zwtot*ptsphy + zdtdiab; + zqold = zqsmix[jk]; + ztold = ztp1[jk]; + ztp1[jk] = ztp1[jk] + zdtforc; + ztp1[jk] = fmax(ztp1[jk], (double) 160.0); + llflag = true; + + // Formerly a call to CUADJTQ(..., ICALL=5) + zqp = (double) 1.0 / pap[jl + klon*(jk + klev*(ibl))]; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)) + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond = (zqsmix[jk] - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)*(1.0/pow(ztp1[jk] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)*(1.0/pow(ztp1[jk] - r4ies, 2))))); + ztp1[jk] = ztp1[jk] + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond; + zqsmix[jk] = zqsmix[jk] - zcond; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)) + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond1 = (zqsmix[jk] - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)*(1.0/pow(ztp1[jk] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)*(1.0/pow(ztp1[jk] - r4ies, 2))))); + ztp1[jk] = ztp1[jk] + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond1; + zqsmix[jk] = zqsmix[jk] - zcond1; + + zdqs = zqsmix[jk] - zqold; + zqsmix[jk] = zqold; + ztp1[jk] = ztold; + + //---------------------------------------------------------------------- + // 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + // ---------------------------------------------------------------------- + // Erosion term is LINEAR in L + // Changed to be uniform distribution in cloud region + + + // Previous function based on DELTA DISTRIBUTION in cloud: + if (zdqs > (double) 0.0) { + // If subsidence evaporation term is turned off, then need to use updated + // liquid and cloud here? + // ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + zlevap = za[jk]*fmin(zdqs, zlicld); + zlevap = fmin(zlevap, zevaplimmix); + zlevap = fmin(zlevap, fmax(zqsmix[jk] - zqx[jk + klev*(4)], (double) 0.0)); + + // For first guess call + zlevapl = zliqfrac[jk]*zlevap; + zlevapi = zicefrac[jk]*zlevap; + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jk]*zlevap; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jk]*zlevap; + + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jk]*zlevap; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jk]*zlevap; + + } + + + //---------------------------------------------------------------------- + // 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + //---------------------------------------------------------------------- + // (1) Increase of cloud water in existing clouds + if (za[jk] > zepsec && zdqs <= -(*yrecldp).rlmin) { + + zlcond1 = fmax(-zdqs, (double) 0.0); //new limiter + + //old limiter (significantly improves upper tropospheric humidity rms) + if (za[jk] > (double) 0.99) { + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsmix[jk]); + zcdmax = (zqx[jk + klev*(4)] - zqsmix[jk]) / ((double) 1.0 + + zcor*zqsmix[jk]*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)*(1.0/pow(ztp1[jk] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)*(1.0/pow(ztp1[jk] - r4ies, 2))))); + } else { + zcdmax = (zqx[jk + klev*(4)] - za[jk]*zqsmix[jk]) / za[jk]; + } + zlcond1 = fmax(fmin(zlcond1, zcdmax), (double) 0.0); + // end old limiter + + zlcond1 = za[jk]*zlcond1; + if (zlcond1 < (*yrecldp).rlmin) { + zlcond1 = (double) 0.0; + } + + //------------------------------------------------------------------------- + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------- + if (ztp1[jk] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond1; + zqxfg[0] = zqxfg[0] + zlcond1; + } else { + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond1; + zqxfg[1] = zqxfg[1] + zlcond1; + } + } + + // (2) Generation of new clouds (da/dt>0) + + + if (zdqs <= -(*yrecldp).rlmin && za[jk] < (double) 1.0 - zepsec) { + + //--------------------------- + // Critical relative humidity + //--------------------------- + zrhc = (*yrecldp).ramid; + zsigk = pap[jl + klon*(jk + klev*(ibl))] / paph[jl + klon*(klev + + (klev + 1)*(ibl))]; + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + if (zsigk > (double) 0.8) { + zrhc = (*yrecldp).ramid + ((double) 1.0 - (*yrecldp).ramid)*(pow(((zsigk - + (double) 0.8) / (double) 0.2), 2)); + } + + // Commented out for CY37R1 to reduce humidity in high trop and strat + // ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + // ZBOTT=ZTRPAUS(JL)+0.2_JPRB + // IF(ZSIGK < ZBOTT) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + // ENDIF + + //--------------------------- + // Supersaturation options + //--------------------------- + if ((*yrecldp).nssopt == 0) { + // No scheme + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / fmax(zepsec, (double) 1.0 + - za[jk]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 1) { + // Tompkins + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / fmax(zepsec, (double) 1.0 + - za[jk]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 2) { + // Lohmann and Karcher + zqe = zqx[jk + klev*(4)]; + } else if ((*yrecldp).nssopt == 3) { + // Gierens + zqe = zqx[jk + klev*(4)] + zli[jk]; + } + + if (ztp1[jk] >= rtt || (*yrecldp).nssopt == 0) { + // No ice supersaturation allowed + zfac = (double) 1.0; + } else { + // Ice supersaturation + zfac = zfokoop; + } + + if (zqe >= zrhc*zqsice[jk]*zfac && zqe < zqsice[jk]*zfac) { + // note: not **2 on 1-a term if ZQE is used. + // Added correction term ZFAC to numerator 15/03/2010 + zacond = -((double) 1.0 - za[jk])*zfac*zdqs / fmax((double) + 2.0*(zfac*zqsice[jk] - zqe), zepsec); + + zacond = fmin(zacond, (double) 1.0 - za[jk]); //PUT THE LIMITER BACK + + // Linear term: + // Added correction term ZFAC 15/03/2010 + zlcond2 = -zfac*zdqs*(double) 0.5*zacond; //mine linear + + // new limiter formulation + zzdl = + (double) 2.0*(zfac*zqsice[jk] - zqe) / fmax(zepsec, (double) 1.0 - za[jk]); + // Added correction term ZFAC 15/03/2010 + if (zfac*zdqs < -zzdl) { + // ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + zlcondlim = (za[jk] - (double) 1.0)*zfac*zdqs - zfac*zqsice[jk] + zqx[jk + + klev*(4)]; + zlcond2 = fmin(zlcond2, zlcondlim); + } + zlcond2 = fmax(zlcond2, (double) 0.0); + + if (zlcond2 < (*yrecldp).rlmin || ((double) 1.0 - za[jk]) < zepsec) { + zlcond2 = (double) 0.0; + zacond = (double) 0.0; + } + if (zlcond2 == (double) 0.0) { + zacond = (double) 0.0; + } + + // Large-scale generation is LINEAR in A and LINEAR in L + zsolac = zsolac + zacond; //linear + + //------------------------------------------------------------------------ + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------ + if (ztp1[jk] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond2; + zqxfg[0] = zqxfg[0] + zlcond2; + } else { + // homogeneous freezing + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond2; + zqxfg[1] = zqxfg[1] + zlcond2; + } + + } + } + + //---------------------------------------------------------------------- + // 3.7 Growth of ice by vapour deposition + //---------------------------------------------------------------------- + // Following Rotstayn et al. 2001: + // does not use the ice nuclei number from cloudaer.F90 + // but rather a simple Meyers et al. 1992 form based on the + // supersaturation and assuming clouds are saturated with + // respect to liquid water (well mixed), (or Koop adjustment) + // Growth considered as sink of liquid water if present so + // Bergeron-Findeisen adjustment in autoconversion term no longer needed + //---------------------------------------------------------------------- + + //-------------------------------------------------------- + //- + //- Ice deposition following Rotstayn et al. (2001) + //- (monodisperse ice particle size distribution) + //- + //-------------------------------------------------------- + if (idepice == 1) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[-1 + jk] < (*yrecldp).rcldtopcf && za[jk] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq - + (double) 0.639); + + //------------------------------------------------ + // 2.4e-2 is conductivity of air + // 8.8 = 700**1/3 = density of ice to the third + //------------------------------------------------ + zadd = + rlstt*(rlstt / (rv*ztp1[jk]) - (double) 1.0) / ((double) 2.4E-2*ztp1[jk]); + zbdd = rv*ztp1[jk]*pap[jl + klon*(jk + klev*(ibl))] / ((double) + 2.21*zvpice); + zcvds = (double) 7.8*(pow((zicenuclei / zrho), (double) 0.666))*(zvpliq - + zvpice) / ((double) 8.87*(zadd + zbdd)*zvpice); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + //------------------ + // new value of ice: + //------------------ + zinew = pow(((double) 0.666*zcvds*ptsphy + (pow(zice0, (double) 0.666))), + (double) 1.5); + + //--------------------------- + // grid-mean deposition rate: + //--------------------------- + zdepos = fmax(za[jk]*(zinew - zice0), (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + + } + + //-------------------------------------------------------- + //- + //- Ice deposition assuming ice PSD + //- + //-------------------------------------------------------- + } else if (idepice == 2) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[-1 + jk] < (*yrecldp).rcldtopcf && za[jk] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq - + (double) 0.639); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + // Particle size distribution + ztcg = (double) 1.0; + zfacx1i = (double) 1.0; + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk] + + pap[jl + klon*(jk + klev*(ibl))]*(*yrecldp).rcl_apb3*(pow(ztp1[jk], + (double) 3.)); + zcorrfac = pow(((double) 1.0 / zrho), (double) 0.5); + zcorrfac2 = (pow((ztp1[jk] / (double) 273.0), (double) 1.5))*((double) 393.0 / + (ztp1[jk] + (double) 120.0)); + + zpr02 = zrho*zice0*(*yrecldp).rcl_const1i / (ztcg*zfacx1i); + + zterm1 = (zvpliq - zvpice)*(pow(ztp1[jk], (double) 2.0)) + *zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2i*zfacx1i / (zrho*zaplusb*zvpice) + ; + zterm2 = (double) 0.65*(*yrecldp).rcl_const6i*(pow(zpr02, (*yrecldp) + .rcl_const4i)) + (*yrecldp).rcl_const3i*(pow(zcorrfac, (double) 0.5)) + *(pow(zrho, (double) 0.5))*(pow(zpr02, (*yrecldp).rcl_const5i)) / + (pow(zcorrfac2, (double) 0.5)); + + zdepos = fmax(za[jk]*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + } + + } + // on IDEPICE + + //###################################################################### + // 4 *** PRECIPITATION PROCESSES *** + //###################################################################### + + //---------------------------------- + // revise in-cloud consensate amount + //---------------------------------- + ztmpa = (double) 1.0 / fmax(za[jk], zepsec); + zliqcld = zqxfg[0]*ztmpa; + zicecld = zqxfg[1]*ztmpa; + zlicld = zliqcld + zicecld; + + //---------------------------------------------------------------------- + // 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + // now that rain, snow, graupel species are prognostic + // the precipitation flux can be defined directly level by level + // There is no vertical memory required from the flux variable + //---------------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm] || jm + 1 == 2) { + //------------------------ + // source from layer above + //------------------------ + if (jk + 1 > (*yrecldp).ncldtop) { + zfallsrce[jm] = zpfplsx[jk + (klev + 1)*jm]*zdtgdp; + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zfallsrce[jm]; + zqxfg[jm] = zqxfg[jm] + zfallsrce[jm]; + // use first guess precip----------V + zqpretot = zqpretot + zqxfg[jm]; + } + //------------------------------------------------- + // sink to next layer, constant fall speed + //------------------------------------------------- + // if aerosol effect then override + // note that for T>233K this is the same as above. + if ((*yrecldp).laericesed && jm + 1 == 2) { + zre_ice = pre_ice[jl + klon*(jk + klev*(ibl))]; + // The exponent value is from + // Morrison et al. JAS 2005 Appendix + zvqx[1] = (double) 0.002*(pow(zre_ice, (double) 1.0)); + } + zfall = zvqx[jm]*zrho; + //------------------------------------------------- + // modified by Heymsfield and Iaquinta JAS 2000 + //------------------------------------------------- + // ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + // &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + zfallsink[jm] = zdtgdp*zfall; + // Cloud budget diagnostic stored at end as implicit + // jl + } + // LLFALL + } + // jm + + //--------------------------------------------------------------- + // Precip cover overlap using MAX-RAN Overlap + // Since precipitation is now prognostic we must + // 1) apply an arbitrary minimum coverage (0.3) if precip>0 + // 2) abandon the 2-flux clr/cld treatment + // 3) Thus, since we have no memory of the clear sky precip + // fraction, we mimic the previous method by reducing + // ZCOVPTOT(JL), which has the memory, proportionally with + // the precip evaporation rate, taking cloud fraction + // into account + // #3 above leads to much smoother vertical profiles of + // precipitation fraction than the Klein-Jakob scheme which + // monotonically increases precip fraction and then resets + // it to zero in a step function once clear-sky precip reaches + // zero. + //--------------------------------------------------------------- + if (zqpretot > zepsec) { + zcovptot = (double) 1.0 - (((double) 1.0 - zcovptot)*((double) 1.0 - fmax(za[jk], + za[-1 + jk])) / ((double) 1.0 - fmin(za[-1 + jk], (double) 1.0 - (double) + 1.E-06))); + zcovptot = fmax(zcovptot, (*yrecldp).rcovpmin); + zcovpclr = fmax((double) 0.0, zcovptot - za[jk]); // clear sky proportion + zraincld = zqxfg[2] / zcovptot; + zsnowcld = zqxfg[3] / zcovptot; + zcovpmax = fmax(zcovptot, zcovpmax); + } else { + zraincld = (double) 0.0; + zsnowcld = (double) 0.0; + zcovptot = (double) 0.0; // no flux - reset cover + zcovpclr = (double) 0.0; // reset clear sky proportion + zcovpmax = (double) 0.0; // reset max cover for ZZRH calc + } + + //---------------------------------------------------------------------- + // 4.3a AUTOCONVERSION TO SNOW + //---------------------------------------------------------------------- + + if (ztp1[jk] <= rtt) { + //----------------------------------------------------- + // Snow Autoconversion rate follow Lin et al. 1983 + //----------------------------------------------------- + if (zicecld > zepsec) { + + zzco = ptsphy*(*yrecldp).rsnowlin1*exp((*yrecldp).rsnowlin2*(ztp1[jk] - rtt)); + + if ((*yrecldp).laericeauto) { + zlcrit = picrit_aer[jl + klon*(jk + klev*(ibl))]; + // 0.3 = N**0.333 with N=0.027 + zzco = zzco*(pow(((*yrecldp).rnice / pnice[jl + klon*(jk + klev*( + ibl))]), (double) 0.333)); + } else { + zlcrit = (*yrecldp).rlcritsnow; + } + + zsnowaut = zzco*((double) 1.0 - exp(-(pow((zicecld / zlcrit), 2)))); + zsolqb[3 + 5*(1)] = zsolqb[3 + 5*(1)] + zsnowaut; + + } + } + + //---------------------------------------------------------------------- + // 4.3b AUTOCONVERSION WARM CLOUDS + // Collection and accretion will require separate treatment + // but for now we keep this simple treatment + //---------------------------------------------------------------------- + + if (zliqcld > zepsec) { + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Sundqvist (1989) + //- + //-------------------------------------------------------- + if (iwarmrain == 1) { + + zzco = (*yrecldp).rkconv*ptsphy; + + if ((*yrecldp).laerliqautolsp) { + zlcrit = plcrit_aer[jl + klon*(jk + klev*(ibl))]; + // 0.3 = N**0.333 with N=125 cm-3 + zzco = zzco*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*(ibl) + )]), (double) 0.333)); + } else { + // Modify autoconversion threshold dependent on: + // land (polluted, high CCN, smaller droplets, higher threshold) + // sea (clean, low CCN, larger droplets, lower threshold) + if (plsm[jl + klon*(ibl)] > (double) 0.5) { + zlcrit = (*yrecldp).rclcrit_land; // land + } else { + zlcrit = (*yrecldp).rclcrit_sea; // ocean + } + } + + //------------------------------------------------------------------ + // Parameters for cloud collection by rain and snow. + // Note that with new prognostic variable it is now possible + // to REPLACE this with an explicit collection parametrization + //------------------------------------------------------------------ + zprecip = (zpfplsx[jk + (klev + 1)*(3)] + zpfplsx[jk + (klev + 1)*(2) + ]) / fmax(zepsec, zcovptot); + zcfpr = (double) 1.0 + (*yrecldp).rprc1*sqrt(fmax(zprecip, (double) 0.0)); + // ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + // &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + if ((*yrecldp).laerliqcoll) { + // 5.0 = N**0.333 with N=125 cm-3 + zcfpr = zcfpr*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*( + ibl))]), (double) 0.333)); + } + + zzco = zzco*zcfpr; + zlcrit = zlcrit / fmax(zcfpr, zepsec); + + if (zliqcld / zlcrit < (double) 20.0) { + // Security for exp for some compilers + zrainaut = zzco*((double) 1.0 - exp(-(pow((zliqcld / zlcrit), 2)))); + } else { + zrainaut = zzco; + } + + // rain freezes instantly + if (ztp1[jk] <= rtt) { + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zrainaut; + } else { + zsolqb[2 + 5*(0)] = zsolqb[2 + 5*(0)] + zrainaut; + } + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Khairoutdinov and Kogan (2000) + //- + //-------------------------------------------------------- + } else if (iwarmrain == 2) { + + if (plsm[jl + klon*(ibl)] > (double) 0.5) { + // land + zconst = (*yrecldp).rcl_kk_cloud_num_land; + zlcrit = (*yrecldp).rclcrit_land; + } else { + // ocean + zconst = (*yrecldp).rcl_kk_cloud_num_sea; + zlcrit = (*yrecldp).rclcrit_sea; + } + + if (zliqcld > zlcrit) { + + zrainaut = (double) 1.5*za[jk]*ptsphy*(*yrecldp).rcl_kkaau*(pow(zliqcld, + (*yrecldp).rcl_kkbauq))*(pow(zconst, (*yrecldp).rcl_kkbaun)); + + zrainaut = fmin(zrainaut, zqxfg[0]); + if (zrainaut < zepsec) { + zrainaut = (double) 0.0; + } + + zrainacc = (double) 2.0*za[jk]*ptsphy*(*yrecldp) + .rcl_kkaac*(pow((zliqcld*zraincld), (*yrecldp).rcl_kkbac)); + + zrainacc = fmin(zrainacc, zqxfg[0]); + if (zrainacc < zepsec) { + zrainacc = (double) 0.0; + } + + } else { + zrainaut = (double) 0.0; + zrainacc = (double) 0.0; + } + + // If temperature < 0, then autoconversion produces snow rather than rain + // Explicit + if (ztp1[jk] <= rtt) { + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainaut; + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainaut; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainacc; + } else { + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainaut; + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainaut; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainacc; + } + + } + // on IWARMRAIN + + } + // on ZLIQCLD > ZEPSEC + + + //---------------------------------------------------------------------- + // RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + // only active if T<0degC and supercooled liquid water is present + // AND if not Sundquist autoconversion (as this includes riming) + //---------------------------------------------------------------------- + if (iwarmrain > 1) { + + if (ztp1[jk] <= rtt && zliqcld > zepsec) { + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), (double) 0.4); + + //------------------------------------------------------------------ + // Riming of snow by cloud water - implicit in lwc + //------------------------------------------------------------------ + if (zsnowcld > zepsec && zcovptot > (double) 0.01) { + + // Calculate riming term + // Factor of liq water taken out because implicit + zsnowrime = (double) 0.3*zcovptot*ptsphy*(*yrecldp) + .rcl_const7s*zfallcorr*(pow((zrho*zsnowcld*(*yrecldp).rcl_const1s), + (*yrecldp).rcl_const8s)); + + // Limit snow riming term + zsnowrime = fmin(zsnowrime, (double) 1.0); + + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zsnowrime; + + } + + //------------------------------------------------------------------ + // Riming of ice by cloud water - implicit in lwc + // NOT YET ACTIVE + //------------------------------------------------------------------ + // IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + // + // ! Calculate riming term + // ! Factor of liq water taken out because implicit + // ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + // & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + // + // ! Limit ice riming term + // ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + // + // ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + // + // ENDIF + } + + } + // on IWARMRAIN > 1 + + + //---------------------------------------------------------------------- + // 4.4a MELTING OF SNOW and ICE + // with new implicit solver this also has to treat snow or ice + // precipitating from the level above... i.e. local ice AND flux. + // in situ ice and snow: could arise from LS advection or warming + // falling ice and snow: arrives by precipitation process + //---------------------------------------------------------------------- + + zicetot = zqxfg[1] + zqxfg[3]; + zmeltmax = (double) 0.0; + + // If there are frozen hydrometeors present and dry-bulb temperature > 0degC + if (zicetot > zepsec && ztp1[jk] > rtt) { + + // Calculate subsaturation + zsubsat = fmax(zqsice[jk] - zqx[jk + klev*(4)], (double) 0.0); + + // Calculate difference between dry-bulb (ZTP1) and the temperature + // at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + // Melting only occurs if the wet-bulb temperature >0 + // i.e. warming of ice particle due to melting > cooling + // due to evaporation. + ztdmtw0 = ztp1[jk] - rtt - zsubsat*(ztw1 + ztw2*(pap[jl + klon*(jk + + klev*(ibl))] - ztw3) - ztw4*(ztp1[jk] - ztw5)); + // Not implicit yet... + // Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*ztdmtw0) / (*yrecldp).rtaumel); + zmeltmax = fmax(ztdmtw0*zcons1*zrldcp, (double) 0.0); + } + + // Loop over frozen hydrometeors (ice, snow) + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (iphase[jm] == 2) { + jn = imelt[jm]; + if (zmeltmax > zepsec && zicetot > zepsec) { + // Apply melting in same proportion as frozen hydrometeor fractions + zalfa = zqxfg[jm] / zicetot; + zmelt = fmin(zqxfg[jm], zalfa*zmeltmax); + // needed in first guess + // This implies that zqpretot has to be recalculated below + // since is not conserved here if ice falls and liquid doesn't + zqxfg[jm] = zqxfg[jm] - zmelt; + zqxfg[-1 + jn] = zqxfg[-1 + jn] + zmelt; + zsolqa[-1 + jn + 5*jm] = zsolqa[-1 + jn + 5*jm] + zmelt; + zsolqa[jm + 5*(-1 + jn)] = zsolqa[jm + 5*(-1 + jn)] - zmelt; + } + } + } + + //---------------------------------------------------------------------- + // 4.4b FREEZING of RAIN + //---------------------------------------------------------------------- + + // If rain present + if (zqx[jk + klev*(2)] > zepsec) { + + if (ztp1[jk] <= rtt && ztp1[-1 + jk] > rtt) { + // Base of melting layer/top of refreezing layer so + // store rain/snow fraction for precip type diagnosis + // If mostly rain, then supercooled rain slow to freeze + // otherwise faster to freeze (snow or ice pellets) + zqpretot = fmax(zqx[jk + klev*(3)] + zqx[jk + klev*(2)], zepsec); + prainfrac_toprfz[jl + klon*(ibl)] = + zqx[jk + klev*(2)] / zqpretot; + if (prainfrac_toprfz[jl + klon*(ibl)] > 0.8) { + llrainliq = true; + } else { + llrainliq = false; + } + } + + // If temperature less than zero + if (ztp1[jk] < rtt) { + + if (prainfrac_toprfz[jl + klon*(ibl)] > 0.8) { + + // Majority of raindrops completely melted + // Refreezing is by slow heterogeneous freezing + + // Slope of rain particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zqx[jk + klev*(2)])), + (*yrecldp).rcl_fac2); + + // Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ztemp = (*yrecldp).rcl_fzrab*(ztp1[jk] - rtt); + zfrz = ptsphy*((*yrecldp).rcl_const5r / zrho)*(exp(ztemp) - (double) 1.) + *(pow(zlambda, (*yrecldp).rcl_const6r)); + zfrzmax = fmax(zfrz, (double) 0.0); + + } else { + + // Majority of raindrops only partially melted + // Refreeze with a shorter timescale (reverse of melting...for now) + + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*(rtt - ztp1[jk])) / + (*yrecldp).rtaumel); + zfrzmax = fmax((rtt - ztp1[jk])*zcons1*zrldcp, (double) 0.0); + + } + + if (zfrzmax > zepsec) { + zfrz = fmin(zqx[jk + klev*(2)], zfrzmax); + zsolqa[3 + 5*(2)] = zsolqa[3 + 5*(2)] + zfrz; + zsolqa[2 + 5*(3)] = zsolqa[2 + 5*(3)] - zfrz; + } + } + + } + + + //---------------------------------------------------------------------- + // 4.4c FREEZING of LIQUID + //---------------------------------------------------------------------- + // not implicit yet... + zfrzmax = fmax(((*yrecldp).rthomo - ztp1[jk])*zrldcp, (double) 0.0); + + jm = 1; + jn = imelt[-1 + jm]; + if (zfrzmax > zepsec && zqxfg[-1 + jm] > zepsec) { + zfrz = fmin(zqxfg[-1 + jm], zfrzmax); + zsolqa[-1 + jn + 5*(-1 + jm)] = zsolqa[-1 + jn + 5*(-1 + jm)] + zfrz; + zsolqa[-1 + jm + 5*(-1 + jn)] = zsolqa[-1 + jm + 5*(-1 + jn)] - zfrz; + } + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF RAIN/SNOW + //---------------------------------------------------------------------- + + //---------------------------------------- + // Rain evaporation scheme from Sundquist + //---------------------------------------- + if (ievaprain == 1) { + + // Rain + + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsliq[jk]) / fmax(zepsec, (double) 1.0 - + za[jk]); + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsliq[jk])); + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq[jk]; + + if (llo1) { + // note: zpreclr is a rain flux + zpreclr = zqxfg[2]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*(ibl))] / paph[jl + + klon*(klev + (klev + 1)*(ibl))]) / (*yrecldp).rvrfactor*zpreclr / + fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(double) 0.5*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsliq; + zdpr = zcovpclr*zbeta*(zqsliq[jk] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Evaporate rain + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + + //--------------------------------------------------------- + // Rain evaporation scheme based on Abel and Boutle (2013) + //--------------------------------------------------------- + } else if (ievaprain == 2) { + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for rain evaporation + // to avoid cloud formation and saturation of the grid box + //----------------------------------------------------------------------- + // Limit RH for rain evaporation dependent on precipitation fraction + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + // Critical relative humidity + //ZRHC=RAMID + //ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + //IF(ZSIGK > 0.8_JPRB) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + //ENDIF + //ZZRH = MIN(ZRHC,ZZRH) + + // Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + zzrh = fmin((double) 0.8, zzrh); + + zqe = fmax((double) 0.0, fmin(zqx[jk + klev*(4)], zqsliq[jk])); + + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq[jk]; + + if (llo1) { + + //------------------------------------------- + // Abel and Boutle (2012) evaporation + //------------------------------------------- + // Calculate local precipitation (kg/kg) + zpreclr = zqxfg[2] / zcovptot; + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), 0.4); + + // Saturation vapour pressure with respect to liquid phase + zesatliq = rv / rd*((double)(r2es*exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))); + + // Slope of particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zpreclr)), (*yrecldp).rcl_fac2); // ZPRECLR=kg/kg + + zevap_denom = (*yrecldp).rcl_cdenom1*zesatliq - (*yrecldp) + .rcl_cdenom2*ztp1[jk]*zesatliq + (*yrecldp).rcl_cdenom3*(pow(ztp1[jk], + (double) 3.))*pap[jl + klon*(jk + klev*(ibl))]; + + // Temperature dependent conductivity + zcorr2 = (pow((ztp1[jk] / (double) 273.), (double) 1.5))*(double) 393. / + (ztp1[jk] + (double) 120.); + zka = (*yrecldp).rcl_ka273*zcorr2; + + zsubsat = fmax(zzrh*zqsliq[jk] - zqe, (double) 0.0); + + zbeta = ((double) 0.5 / zqsliq[jk])*(pow(ztp1[jk], (double) 2.)) + *zesatliq*(*yrecldp).rcl_const1r*(zcorr2 / zevap_denom)*((double) 0.78 / + (pow(zlambda, (*yrecldp).rcl_const4r)) + (*yrecldp) + .rcl_const2r*(pow((zrho*zfallcorr), (double) 0.5)) / ((pow(zcorr2, (double) + 0.5))*(pow(zlambda, (*yrecldp).rcl_const3r)))); + + zdenom = (double) 1.0 + zbeta*ptsphy; //*ZCORQSLIQ(JL) + zdpevap = zcovpclr*zbeta*ptsphy*zsubsat / zdenom; + + //--------------------------------------------------------- + // Add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Limit rain evaporation + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + } + // on IEVAPRAIN + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF SNOW + //---------------------------------------------------------------------- + // Snow + if (ievapsnow == 1) { + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / fmax(zepsec, (double) 1.0 - + za[jk]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice[jk])); + llo1 = zcovpclr > zepsec && zqxfg[3] > zepsec && zqe < zzrh*zqsice[jk]; + + if (llo1) { + // note: zpreclr is a rain flux a + zpreclr = zqxfg[3]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*(ibl))] / paph[jl + + klon*(klev + (klev + 1)*(ibl))]) / (*yrecldp).rvrfactor*zpreclr / + fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsice; + zdpr = zcovpclr*zbeta*(zqsice[jk] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce snow to zero and model + // produces small amounts of snowfall everywhere. + //--------------------------------------------------------- + + // Evaporate snow + zevap = fmin(zdpevap, zqxfg[3]); + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqxfg[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + //--------------------------------------------------------- + } else if (ievapsnow == 2) { + + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for snow evaporation + //----------------------------------------------------------------------- + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / fmax(zepsec, (double) 1.0 - + za[jk]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice[jk])); + llo1 = + zcovpclr > zepsec && zqx[jk + klev*(3)] > zepsec && zqe < zzrh*zqsice[jk]; + + if (llo1) { + + // Calculate local precipitation (kg/kg) + zpreclr = zqx[jk + klev*(3)] / zcovptot; + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))*rv / rd; + + // Particle size distribution + // ZTCG increases Ni with colder temperatures - essentially a + // Fletcher or Meyers scheme? + ztcg = (double) 1.0; //v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + // ZFACX1I modification is based on Andrew Barrett's results + zfacx1s = (double) 1.0; //v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk] + + pap[jl + klon*(jk + klev*(ibl))]*(*yrecldp).rcl_apb3*(pow(ztp1[jk], + 3)); + zcorrfac = pow((1.0 / zrho), 0.5); + zcorrfac2 = (pow((ztp1[jk] / 273.0), 1.5))*(393.0 / (ztp1[jk] + 120.0)); + + zpr02 = zrho*zpreclr*(*yrecldp).rcl_const1s / (ztcg*zfacx1s); + + zterm1 = (zqsice[jk] - zqe)*(pow(ztp1[jk], 2))*zvpice*zcorrfac2*ztcg*(*yrecldp) + .rcl_const2s*zfacx1s / (zrho*zaplusb*zqsice[jk]); + zterm2 = 0.65*(*yrecldp).rcl_const6s*(pow(zpr02, (*yrecldp).rcl_const4s)) + + (*yrecldp).rcl_const3s*(pow(zcorrfac, 0.5))*(pow(zrho, 0.5))*(pow(zpr02, + (*yrecldp).rcl_const5s)) / (pow(zcorrfac2, 0.5)); + + zdpevap = fmax(zcovpclr*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit evaporation to snow amount + //-------------------------------------------------------------------- + zevap = fmin(zdpevap, zevaplimice); + zevap = fmin(zevap, zqx[jk + klev*(3)]); + + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqx[jk + klev*(3)])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + + } + // on IEVAPSNOW + + //-------------------------------------- + // Evaporate small precipitation amounts + //-------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm]) { + if (zqxfg[jm] < (*yrecldp).rlmin) { + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zqxfg[jm]; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zqxfg[jm]; + } + } + } + + //###################################################################### + // 5.0 *** SOLVERS FOR A AND L *** + // now use an implicit solution rather than exact solution + // solver is forward in time, upstream difference for advection + //###################################################################### + + //--------------------------- + // 5.1 solver for cloud cover + //--------------------------- + zanew = (za[jk] + zsolac) / ((double) 1.0 + zsolab); + zanew = fmin(zanew, (double) 1.0); + if (zanew < (*yrecldp).ramin) { + zanew = (double) 0.0; + } + zda = zanew - zaorig[jk]; + //--------------------------------- + // variables needed for next level + //--------------------------------- + zanewm1 = zanew; + + //-------------------------------- + // 5.2 solver for the microphysics + //-------------------------------- + + //-------------------------------------------------------------- + // Truncate explicit sinks to avoid negatives + // Note: Species are treated in the order in which they run out + // since the clipping will alter the balance for the other vars + //-------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + llindex3[jn + 5*jm] = false; + } + zsinksum[jm] = (double) 0.0; + } + + //---------------------------- + // collect sink terms and mark + //---------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsinksum[jm] = zsinksum[jm] - zsolqa[jm + 5*jn]; // +ve total is bad + } + } + + //--------------------------------------- + // calculate overshoot and scaling factor + //--------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zmax = fmax(zqx[jk + klev*jm], zepsec); + zrat = fmax(zsinksum[jm], zmax); + zratio[jm] = zmax / zrat; + } + + //-------------------------------------------- + // scale the sink terms, in the correct order, + // recalculating the scale factor each time + //-------------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zsinksum[jm] = (double) 0.0; + } + + //---------------- + // recalculate sum + //---------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + psum_solqa = 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + psum_solqa = psum_solqa + zsolqa[jm + 5*jn]; + } + // ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + zsinksum[jm] = zsinksum[jm] - psum_solqa; + //--------------------------- + // recalculate scaling factor + //--------------------------- + zmm = fmax(zqx[jk + klev*jm], zepsec); + zrr = fmax(zsinksum[jm], zmm); + zratio[jm] = zmm / zrr; + //------ + // scale + //------ + zzratio = zratio[jm]; + //DIR$ IVDEP + //DIR$ PREFERVECTOR + for (jn = 0; jn <= 5 + -1; jn += 1) { + if (zsolqa[jm + 5*jn] < (double) 0.0) { + zsolqa[jm + 5*jn] = zsolqa[jm + 5*jn]*zzratio; + zsolqa[jn + 5*jm] = zsolqa[jn + 5*jm]*zzratio; + } + } + } + + //-------------------------------------------------------------- + // 5.2.2 Solver + //------------------------ + + //------------------------ + // set the LHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + //---------------------------------------------- + // diagonals: microphysical sink terms+transport + //---------------------------------------------- + if (jn + 1 == jm + 1) { + zqlhs[jn + 5*jm] = (double) 1.0 + zfallsink[jm]; + for (jo = 0; jo <= 5 + -1; jo += 1) { + zqlhs[jn + 5*jm] = zqlhs[jn + 5*jm] + zsolqb[jo + 5*jn]; + } + //------------------------------------------ + // non-diagonals: microphysical source terms + //------------------------------------------ + } else { + zqlhs[jn + 5*jm] = -zsolqb[jn + 5*jm]; // here is the delta T - missing from doc. + } + } + } + + //------------------------ + // set the RHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + //--------------------------------- + // sum the explicit source and sink + //--------------------------------- + zexplicit = (double) 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + zexplicit = zexplicit + zsolqa[jm + 5*jn]; // sum over middle index + } + zqxn[jm] = zqx[jk + klev*jm] + zexplicit; + } + + //----------------------------------- + // *** solve by LU decomposition: *** + //----------------------------------- + + // Note: This fast way of solving NCLVxNCLV system + // assumes a good behaviour (i.e. non-zero diagonal + // terms with comparable orders) of the matrix stored + // in ZQLHS. For the moment this is the case but + // be aware to preserve it when doing eventual + // modifications. + + // Non pivoting recursive factorization + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + // number of steps + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + // row index + zqlhs[jm + 5*jn] = zqlhs[jm + 5*jn] / zqlhs[jn + 5*jn]; + for (ik = jn + 1; ik <= 5 + -1; ik += 1) { + // column index + zqlhs[jm + 5*ik] = zqlhs[jm + 5*ik] - zqlhs[jm + 5*jn]*zqlhs[jn + 5*ik]; + } + } + } + + // Backsubstitution + // step 1 + for (jn = 1; jn <= 5 + -1; jn += 1) { + for (jm = 0; jm <= jn + 1 - 1 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + } + // step 2 + zqxn[4] = zqxn[4] / zqlhs[4 + 5*(4)]; + for (jn = -2 + 5; jn >= 1 + -1; jn += -1) { + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + zqxn[jn] = zqxn[jn] / zqlhs[jn + 5*jn]; + } + + // Ensure no small values (including negatives) remain in cloud variables nor + // precipitation rates. + // Evaporate l,i,r,s to water vapour. Latent heating taken into account below + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + if (zqxn[jn] < zepsec) { + zqxn[4] = zqxn[4] + zqxn[jn]; + zqxn[jn] = (double) 0.0; + } + } + + //-------------------------------- + // variables needed for next level + //-------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxnm1[jm] = zqxn[jm]; + zqxn2d[jk + klev*jm] = zqxn[jm]; + } + + //------------------------------------------------------------------------ + // 5.3 Precipitation/sedimentation fluxes to next level + // diagnostic precipitation fluxes + // It is this scaled flux that must be used for source to next layer + //------------------------------------------------------------------------ + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[1 + jk + (klev + 1)*jm] = zfallsink[jm]*zqxn[jm]*zrdtgdp; + } + + // Ensure precipitation fraction is zero if no precipitation + zqpretot = + zpfplsx[1 + jk + (klev + 1)*(3)] + zpfplsx[1 + jk + (klev + 1)*(2)]; + if (zqpretot < zepsec) { + zcovptot = (double) 0.0; + } + + //###################################################################### + // 6 *** UPDATE TENDANCIES *** + //###################################################################### + + //-------------------------------- + // 6.1 Temperature and CLV budgets + //-------------------------------- + + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + + // calculate fluxes in and out of box for conservation of TL + zfluxq[jm] = zpsupsatsrce[jm] + zconvsrce[jm] + zfallsrce[jm] - (zfallsink[jm] + + zconvsink[jm])*zqxn[jm]; + + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = tendency_loc_t[jl + + klon*(jk + klev*(ibl))] + ralvdcp*(zqxn[jm] - zqx[jk + klev*jm] - + zfluxq[jm])*zqtmst; + } + + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = tendency_loc_t[jl + + klon*(jk + klev*(ibl))] + ralsdcp*(zqxn[jm] - zqx[jk + klev*jm] - + zfluxq[jm])*zqtmst; + } + + //---------------------------------------------------------------------- + // New prognostic tendencies - ice,liquid rain,snow + // Note: CLV arrays use PCLV in calculation of tendency while humidity + // uses ZQX. This is due to clipping at start of cloudsc which + // include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + //---------------------------------------------------------------------- + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))] = + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))] + (zqxn[jm] - + zqx0[jk + klev*jm])*zqtmst; + + } + + //---------------------- + // 6.2 Humidity budget + //---------------------- + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = tendency_loc_q[jl + + klon*(jk + klev*(ibl))] + (zqxn[4] - zqx[jk + klev*(4)])*zqtmst; + + //------------------- + // 6.3 cloud cover + //----------------------- + tendency_loc_a[jl + klon*(jk + klev*(ibl))] = + tendency_loc_a[jl + klon*(jk + klev*(ibl))] + zda*zqtmst; + + //-------------------------------------------------- + // Copy precipitation fraction into output variable + //------------------------------------------------- + pcovptot[jl + klon*(jk + klev*(ibl))] = zcovptot; + + } + // on vertical level JK + //---------------------------------------------------------------------- + // END OF VERTICAL LOOP + //---------------------------------------------------------------------- + + //###################################################################### + // 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + //###################################################################### + + //-------------------------------------------------------------------- + // Copy general precip arrays back into PFP arrays for GRIB archiving + // Add rain and liquid fluxes, ice and snow fluxes + //-------------------------------------------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfplsl[jl + klon*(jk + (klev + 1)*(ibl))] = + zpfplsx[jk + (klev + 1)*(2)] + zpfplsx[jk + (klev + 1)*(0)]; + pfplsn[jl + klon*(jk + (klev + 1)*(ibl))] = + zpfplsx[jk + (klev + 1)*(3)] + zpfplsx[jk + (klev + 1)*(1)]; + } + + //-------- + // Fluxes: + //-------- + pfsqlf[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqif[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqrf[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqsf[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfcqlng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfcqnng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfcqrng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; //rain + pfcqsng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; //snow + // fluxes due to turbulence + pfsqltur[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqitur[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + + for (jk = 0; jk <= klev + -1; jk += 1) { + + zgdph_r = -zrg_r*(paph[jl + klon*(1 + jk + (klev + 1)*(ibl))] - paph[ + jl + klon*(jk + (klev + 1)*(ibl))])*zqtmst; + pfsqlf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqlf[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqif[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqif[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqrf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqlf[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqsf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqif[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqlng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqlng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqnng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqnng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqrng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqlng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqsng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqnng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqltur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqltur[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqitur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqitur[jl + klon*(jk + (klev + 1)*(ibl))]; + + zalfaw = zfoealfa[jk]; + + // Liquid , LS scheme minus detrainment + pfsqlf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqlf[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(0)] - zqx0[jk + klev*(-1 + + 1)] + pvfl[jl + klon*(jk + klev*(ibl))]*ptsphy - zalfaw*plude[ + jl + klon*(jk + klev*(ibl))])*zgdph_r; + // liquid, negative numbers + pfcqlng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqlng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(0)]*zgdph_r; + + // liquid, vertical diffusion + pfsqltur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqltur[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + pvfl[jl + klon*(jk + klev*(ibl + ))]*ptsphy*zgdph_r; + + // Rain, LS scheme + pfsqrf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqrf[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(2)] - zqx0[jk + klev*(-1 + + 3)])*zgdph_r; + // rain, negative numbers + pfcqrng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqrng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(2)]*zgdph_r; + + // Ice , LS scheme minus detrainment + pfsqif[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqif[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(1)] - zqx0[jk + klev*(-1 + + 2)] + pvfi[jl + klon*(jk + klev*(ibl))]*ptsphy - ((double) 1.0 - + zalfaw)*plude[jl + klon*(jk + klev*(ibl))])*zgdph_r; + // ice, negative numbers + pfcqnng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqnng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(1)]*zgdph_r; + + // ice, vertical diffusion + pfsqitur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqitur[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + pvfi[jl + klon*(jk + klev*(ibl + ))]*ptsphy*zgdph_r; + + // snow, LS scheme + pfsqsf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqsf[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(3)] - zqx0[jk + klev*(-1 + + 4)])*zgdph_r; + // snow, negative numbers + pfcqsng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqsng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(3)]*zgdph_r; + } + + //----------------------------------- + // enthalpy flux due to precipitation + //----------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfhpsl[jl + klon*(jk + (klev + 1)*(ibl))] = + -rlvtt*pfplsl[jl + klon*(jk + (klev + 1)*(ibl))]; + pfhpsn[jl + klon*(jk + (klev + 1)*(ibl))] = + -rlstt*pfplsn[jl + klon*(jk + (klev + 1)*(ibl))]; + } +} + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_c.h b/src/cloudsc_hip/cloudsc/cloudsc_c.h new file mode 100644 index 00000000..e4d0364b --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_c.h @@ -0,0 +1,44 @@ +#include "hip/hip_runtime.h" +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "hip/hip_runtime.h" +#include "yoecldp_c.h" +#include + +__global__ void __launch_bounds__(128, 1) cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2); + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_c_hoist.cpp b/src/cloudsc_hip/cloudsc/cloudsc_c_hoist.cpp new file mode 100644 index 00000000..9506d2c6 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_c_hoist.cpp @@ -0,0 +1,2651 @@ +#include "hip/hip_runtime.h" +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include "cloudsc_c_hoist.h" +#include + +__global__ void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2, + double * __restrict__ zfoealfa, double * __restrict__ ztp1, double * __restrict__ zli, + double * __restrict__ za, double * __restrict__ zaorig, double * __restrict__ zliqfrac, + double * __restrict__ zicefrac, double * __restrict__ zqx, double * __restrict__ zqx0, + double * __restrict__ zpfplsx, double * __restrict__ zlneg, double * __restrict__ zqxn2d, + double * __restrict__ zqsmix, double * __restrict__ zqsliq, double * __restrict__ zqsice, + double * __restrict__ zfoeewmt, double * __restrict__ zfoeew, double * __restrict__ zfoeeliqt) { + + //------------------------------------------------------------------------------- + // Declare input/output arguments + //------------------------------------------------------------------------------- + + // PLCRIT_AER : critical liquid mmr for rain autoconversion process + // PICRIT_AER : critical liquid mmr for snow autoconversion process + // PRE_LIQ : liq Re + // PRE_ICE : ice Re + // PCCN : liquid cloud condensation nuclei + // PNICE : ice number concentration (cf. CCN) + + const int klev = 137; // Number of levels + + double zlcond1, zlcond2, zlevapl, zlevapi, zrainaut, zsnowaut, zliqcld, zicecld; + double zlevap, zleros; + // condensation and evaporation terms + // autoconversion terms + double zfokoop; + //double zfoealfa[klev + 1]; + double zicenuclei; // number concentration of ice nuclei + + double zlicld; + double zacond; + double zaeros; + double zlfinalsum; + double zdqs; + double ztold; + double zqold; + double zdtgdp; + double zrdtgdp; + double ztrpaus; + double zcovpclr; + double zpreclr; + double zcovptot; + double zcovpmax; + double zqpretot; + double zdpevap; + double zdtforc; + double zdtdiab; + //double ztp1[klev]; + double zldefr; + double zldifdt; + double zdtgdpf; + double zlcust[5]; + double zacust; + double zmf; + + double zrho; + double ztmp1, ztmp2, ztmp3; + double ztmp4, ztmp5, ztmp6, ztmp7; + double zalfawm; + + // Accumulators of A,B,and C factors for cloud equations + double zsolab; // -ve implicit CC + double zsolac; // linear CC + double zanew; + double zanewm1; + + double zgdp; + + //---for flux calculation + double zda; + //double zli[klev], za[klev]; + //double zaorig[klev]; // start of scheme value for CC + + int llflag; + int llo1; + + int icall, ik, jk, jl, jm, jn, jo, jlen, is; + + double zdp, zpaphd; + + double zalfa; + // & ZALFACU, ZALFALS + double zalfaw; + double zbeta, zbeta1; + //REAL(KIND=JPRB) :: ZBOTT + double zcfpr; + double zcor; + double zcdmax; + double zmin; + double zlcondlim; + double zdenom; + double zdpmxdt; + double zdpr; + double zdtdp; + double ze; + double zepsec; + double zfac, zfaci, zfacw; + double zgdcp; + double zinew; + double zlcrit; + double zmfdn; + double zprecip; + double zqe; + double zqsat, zqtmst, zrdcp; + double zrhc, zsig, zsigk; + double zwtot; + double zzco, zzdl, zzrh, zzzdt, zqadj; + double zqnew, ztnew; + double zrg_r, zgdph_r, zcons1, zcond, zcons1a; + double zlfinal; + double zmelt; + double zevap; + double zfrz; + double zvpliq, zvpice; + double zadd, zbdd, zcvds, zice0, zdepos; + double zsupsat; + double zfall; + double zre_ice; + double zrldcp; + double zqp1env; + + //---------------------------- + // Arrays for new microphysics + //---------------------------- + int iphase[5]; // marker for water phase of each species + // 0=vapour, 1=liquid, 2=ice + + int imelt[5]; // marks melting linkage for ice categories + // ice->liquid, snow->rain + + int llfall[5]; // marks falling species + // LLFALL=0, cloud cover must > 0 for zqx > 0 + // LLFALL=1, no cloud needed, zqx can evaporate + + int llindex1[5]; // index variable + int llindex3[5 * 5]; // index variable + double zmax; + double zrat; + int iorder[5]; // array for sorting explicit terms + + //double zliqfrac[klev]; // cloud liquid water fraction: ql/(ql+qi) + //double zicefrac[klev]; // cloud ice water fraction: qi/(ql+qi) + double zqxn[5]; // new values for zqx at time+1 + double zqxfg[5]; // first guess values including precip + double zqxnm1[5]; // new values for zqx at time+1 at level above + double zfluxq[5]; // fluxes convergence of species (needed?) + // Keep the following for possible future total water variance scheme? + //REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + //REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + //REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + //REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + //REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + double zmeltmax; + double zfrzmax; + double zicetot; + + + //double zqsmix[klev]; // diagnostic mixed phase saturation + //REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + //double zqsliq[klev]; // liquid water saturation + //double zqsice[klev]; // ice water saturation + + //REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + //REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + //REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + //double zfoeewmt[klev]; + //double zfoeew[klev]; + //double zfoeeliqt[klev]; + //REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + double zdqsliqdt, zdqsicedt, zdqsmixdt; + double zcorqsliq; + double zcorqsice; + //REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + double zcorqsmix; + double zevaplimliq, zevaplimice, zevaplimmix; + + //------------------------------------------------------- + // SOURCE/SINK array for implicit and explicit terms + //------------------------------------------------------- + // a POSITIVE value entered into the arrays is a... + // Source of this variable + // | + // | Sink of this variable + // | | + // V V + // ZSOLQA(JL,IQa,IQb) = explicit terms + // ZSOLQB(JL,IQa,IQb) = implicit terms + // Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + // a source of NCLDQL and a sink of IQV + // put 'magic' source terms such as PLUDE from + // detrainment into explicit source/sink array diagnognal + // ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + // i.e. A positive value is a sink!????? weird... + //------------------------------------------------------- + + double zsolqa[5 * 5]; // explicit sources and sinks + double zsolqb[5 * 5]; // implicit sources and sinks + // e.g. microphysical pathways between ice variables. + double zqlhs[5 * 5]; // n x n matrix storing the LHS of implicit solver + double zvqx[5]; // fall speeds of three categories + double zexplicit; + double zratio[5], zsinksum[5]; + + // for sedimentation source/sink terms + double zfallsink[5]; + double zfallsrce[5]; + + // for convection detrainment source and subsidence source/sink terms + double zconvsrce[5]; + double zconvsink[5]; + + // for supersaturation source term from previous timestep + double zpsupsatsrce[5]; + + // Numerical fit to wet bulb temperature + double ztw1 = (double) 1329.31; + double ztw2 = (double) 0.0074615; + double ztw3 = (double) 0.85E5; + double ztw4 = (double) 40.637; + double ztw5 = (double) 275.0; + + double zsubsat; // Subsaturation for snow melting term + double ztdmtw0; // Diff between dry-bulb temperature and + // temperature when wet-bulb = 0degC + + // Variables for deposition term + double ztcg; // Temperature dependent function for ice PSD + double zfacx1i, zfacx1s; // PSD correction factor + double zaplusb, zcorrfac, zcorrfac2, zpr02, zterm1, zterm2; // for ice dep + double zcldtopdist; // Distance from cloud top + double zinfactor; // No. of ice nuclei factor for deposition + + // Autoconversion/accretion/riming/evaporation + int iwarmrain; + int ievaprain; + int ievapsnow; + int idepice; + double zrainacc; + double zraincld; + double zsnowrime; + double zsnowcld; + double zesatliq; + double zfallcorr; + double zlambda; + double zevap_denom; + double zcorr2; + double zka; + double zconst; + double ztemp; + + // Rain freezing + int llrainliq; // True if majority of raindrops are liquid (no ice core) + + //---------------------------- + // End: new microphysics + //---------------------------- + + //---------------------- + // SCM budget statistics + //---------------------- + double zrain; + + double zhook_handle; + double ztmpl, ztmpi, ztmpa; + + double zmm, zrr; + double zrg; + + double zzsum, zzratio; + double zepsilon; + + double zcond1, zqp; + + double psum_solqa; + int ibl; + int i_llfall_0; + //double zqx[5 * klev]; + //double zqx0[5 * klev]; + //double zpfplsx[5 * (klev + 1)]; + //double zlneg[5 * klev]; + //double zqxn2d[5 * klev]; + + jl = threadIdx.x; + ibl = blockIdx.x; //blockIdx.z; + + + //=============================================================================== + //IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) + + //=============================================================================== + // 0.0 Beginning of timestep book-keeping + //---------------------------------------------------------------------- + + + //###################################################################### + // 0. *** SET UP CONSTANTS *** + //###################################################################### + + zepsilon = (double) 100.*DBL_EPSILON; + + // --------------------------------------------------------------------- + // Set version of warm-rain autoconversion/accretion + // IWARMRAIN = 1 ! Sundquist + // IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + // --------------------------------------------------------------------- + iwarmrain = 2; + // --------------------------------------------------------------------- + // Set version of rain evaporation + // IEVAPRAIN = 1 ! Sundquist + // IEVAPRAIN = 2 ! Abel and Boutle (2013) + // --------------------------------------------------------------------- + ievaprain = 2; + // --------------------------------------------------------------------- + // Set version of snow evaporation + // IEVAPSNOW = 1 ! Sundquist + // IEVAPSNOW = 2 ! New + // --------------------------------------------------------------------- + ievapsnow = 1; + // --------------------------------------------------------------------- + // Set version of ice deposition + // IDEPICE = 1 ! Rotstayn (2001) + // IDEPICE = 2 ! New + // --------------------------------------------------------------------- + idepice = 1; + + // --------------------- + // Some simple constants + // --------------------- + zqtmst = (double) 1.0 / ptsphy; + zgdcp = rg / rcpd; + zrdcp = rd / rcpd; + zcons1a = rcpd / (rlmlt*rg*(*yrecldp).rtaumel); + zepsec = (double) 1.E-14; + zrg_r = (double) 1.0 / rg; + zrldcp = (double) 1.0 / (ralsdcp - ralvdcp); + + // Note: Defined in module/yoecldp.F90 + // NCLDQL=1 ! liquid cloud water + // NCLDQI=2 ! ice cloud water + // NCLDQR=3 ! rain water + // NCLDQS=4 ! snow + // NCLDQV=5 ! vapour + + // ----------------------------------------------- + // Define species phase, 0=vapour, 1=liquid, 2=ice + // ----------------------------------------------- + iphase[4] = 0; + iphase[0] = 1; + iphase[2] = 1; + iphase[1] = 2; + iphase[3] = 2; + + // --------------------------------------------------- + // Set up melting/freezing index, + // if an ice category melts/freezes, where does it go? + // --------------------------------------------------- + imelt[4] = -99; + imelt[0] = 2; + imelt[2] = 4; + imelt[1] = 3; + imelt[3] = 3; + + // ----------------------------------------------- + // INITIALIZATION OF OUTPUT TENDENCIES + // ----------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_a[jl + klon*(jk + klev*ibl)] = (double) 0.0; + } + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; + } + } + + //-- These were uninitialized : meaningful only when we compare error differences + for (jk = 0; jk <= klev + -1; jk += 1) { + pcovptot[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_cld[jl + klon*(jk + klev*(4 + 5*ibl))] = (double) 0.0; + } + + // ------------------------- + // set up fall speeds in m/s + // ------------------------- + zvqx[4] = (double) 0.0; + zvqx[0] = (double) 0.0; + zvqx[1] = (*yrecldp).rvice; + zvqx[2] = (*yrecldp).rvrain; + zvqx[3] = (*yrecldp).rvsnow; + for (i_llfall_0 = 0; i_llfall_0 <= 5 + -1; i_llfall_0 += 1) { + llfall[i_llfall_0] = false; + } + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (zvqx[jm] > (double) 0.0) { + llfall[jm] = true; + } + // falling species + } + // Set LLFALL to false for ice (but ice still sediments!) + // Need to rationalise this at some point + llfall[1] = false; + + + //###################################################################### + // 1. *** INITIAL VALUES FOR VARIABLES *** + //###################################################################### + + + // ---------------------- + // non CLV initialization + // ---------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + ztp1[jl + klon*(jk + klev*ibl)] = + pt[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_t[jl + klon*(jk + klev*ibl)]; + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = + pq[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + zqx0[jl + klon*(jk + klev*(4 + 5*ibl))] = + pq[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + za[jl + klon*(jk + klev*ibl)] = + pa[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + zaorig[jl + klon*(jk + klev*ibl)] = + pa[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + } + + // ------------------------------------- + // initialization for CLV family + // ------------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqx[jl + klon*(jk + klev*(jm + 5*ibl))] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqx0[jl + klon*(jk + klev*(jm + 5*ibl))] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + } + } + + //------------- + // zero arrays + //------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + zpfplsx[jl + klon*(jk + (klev + 1)*(jm + 5*ibl))] = (double) 0.0; // precip fluxes + } + } + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqxn2d[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; // end of timestep values in 2D + zlneg[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; // negative input check + } + } + + prainfrac_toprfz[jl + klon*ibl] = (double) 0.0; // rain fraction at top of refreezing layer + llrainliq = true; // Assume all raindrops are liquid initially + + // ---------------------------------------------------- + // Tidy up very small cloud cover or total cloud water + // ---------------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + if (zqx[jl + klon*(jk + klev*(0 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))] < (*yrecldp).rlmin || + za[jl + klon*(jk + klev*ibl)] < (*yrecldp).ramin) { + + // Evaporate small cloud liquid water amounts + zlneg[jl + klon*(jk + klev*(0 + 5*ibl))] = zlneg[jl + klon*(jk + klev*(0 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + zqadj = zqx[jl + klon*(jk + klev*(0 + 5*ibl))]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + zqx[jl + klon*(jk + klev*(0 + 5*ibl))] = (double) 0.0; + + // Evaporate small cloud ice water amounts + zlneg[jl + klon*(jk + klev*(1 + 5*ibl))] = zlneg[jl + klon*(jk + klev*(1 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + zqadj = zqx[jl + klon*(jk + klev*(1 + 5*ibl))]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + zqx[jl + klon*(jk + klev*(1 + 5*ibl))] = (double) 0.0; + + // Set cloud cover to zero + za[jl + klon*(jk + klev*ibl)] = (double) 0.0; + + } + } + + // --------------------------------- + // Tidy up small CLV variables + // --------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + if (zqx[jl + klon*(jk + klev*(jm + 5*ibl))] < (*yrecldp).rlmin) { + zlneg[jl + klon*(jk + klev*(jm + 5*ibl))] = zlneg[jl + klon*(jk + klev*(jm + 5*ibl))] + + zqx[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqadj = zqx[jl + klon*(jk + klev*(jm + 5*ibl))]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + } + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + } + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqx[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; + } + } + } + + + // ------------------------------ + // Define saturation values + // ------------------------------ + for (jk = 0; jk <= klev + -1; jk += 1) { + //---------------------------------------- + // old *diagnostic* mixed phase saturation + //---------------------------------------- + zfoealfa[jl + klon*(jk + (klev + 1)*ibl)] = + ((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))); + zfoeewmt[jl + klon*(jk + klev*ibl)] = + fmin(((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))) / + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsmix[jl + klon*(jk + klev*ibl)] = zfoeewmt[jl + klon*(jk + klev*ibl)]; + zqsmix[jl + klon*(jk + klev*ibl)] = zqsmix[jl + klon*(jk + klev*ibl)] / + ((double) 1.0 - retv*zqsmix[jl + klon*(jk + klev*ibl)]); + + //--------------------------------------------- + // ice saturation T<273K + // liquid water saturation for T>273K + //--------------------------------------------- + zalfa = ((double)(fmax(0.0, copysign(1.0, ztp1[jl + klon*(jk + klev*ibl)] - rtt)))); + zfoeew[jl + klon*(jk + klev*ibl)] = fmin((zalfa*((double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))) + ((double) 1.0 - zalfa)* + ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))/ + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zfoeew[jl + klon*(jk + klev*ibl)] = fmin((double) 0.5, zfoeew[jl + klon*(jk + klev*ibl)]); + zqsice[jl + klon*(jk + klev*ibl)] = zfoeew[jl + klon*(jk + klev*ibl)] / + ((double) 1.0 - retv*zfoeew[jl + klon*(jk + klev*ibl)]); + + //---------------------------------- + // liquid water saturation + //---------------------------------- + zfoeeliqt[jl + klon*(jk + klev*ibl)] = fmin(((double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))) / pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsliq[jl + klon*(jk + klev*ibl)] = zfoeeliqt[jl + klon*(jk + klev*ibl)]; + zqsliq[jl + klon*(jk + klev*ibl)] = zqsliq[jl + klon*(jk + klev*ibl)] / + ((double) 1.0 - retv*zqsliq[jl + klon*(jk + klev*ibl)]); + + // !---------------------------------- + // ! ice water saturation + // !---------------------------------- + // ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + // ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + // ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + + } + + for (jk = 0; jk <= klev + -1; jk += 1) { + + + //------------------------------------------ + // Ensure cloud fraction is between 0 and 1 + //------------------------------------------ + za[jl + klon*(jk + klev*ibl)] = + fmax((double) 0.0, fmin((double) 1.0, za[jl + klon*(jk + klev*ibl)])); + + //------------------------------------------------------------------- + // Calculate liq/ice fractions (no longer a diagnostic relationship) + //------------------------------------------------------------------- + zli[jl + klon*(jk + klev*ibl)] = zqx[jl + klon*(jk + klev*(0 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + if (zli[jl + klon*(jk + klev*ibl)] > (*yrecldp).rlmin) { + zliqfrac[jl + klon*(jk + klev*ibl)] = + zqx[jl + klon*(jk + klev*(0 + 5*ibl))] / zli[jl + klon*(jk + klev*ibl)]; + zicefrac[jl + klon*(jk + klev*ibl)] = + (double) 1.0 - zliqfrac[jl + klon*(jk + klev*ibl)]; + } else { + zliqfrac[jl + klon*(jk + klev*ibl)] = (double) 0.0; + zicefrac[jl + klon*(jk + klev*ibl)] = (double) 0.0; + } + } + + //###################################################################### + // 2. *** CONSTANTS AND PARAMETERS *** + //###################################################################### + // Calculate L in updrafts of bl-clouds + // Specify QS, P/PS for tropopause (for c2) + // And initialize variables + //------------------------------------------ + + //--------------------------------- + // Find tropopause level (ZTRPAUS) + //--------------------------------- + ztrpaus = (double) 0.1; + zpaphd = (double) 1.0 / paph[jl + klon*(klev + (klev + 1)*ibl)]; + for (jk = 0; jk <= klev - 1 + -1; jk += 1) { + zsig = pap[jl + klon*(jk + klev*ibl)]*zpaphd; + if (zsig > (double) 0.1 && zsig < (double) 0.4 && ztp1[jl + klon*(jk + klev*ibl)] > + ztp1[jl + klon*(1 + jk + klev*ibl)]) { + ztrpaus = zsig; + } + } + + //----------------------------- + // Reset single level variables + //----------------------------- + + zanewm1 = (double) 0.0; + zda = (double) 0.0; + zcovpclr = (double) 0.0; + zcovpmax = (double) 0.0; + zcovptot = (double) 0.0; + zcldtopdist = (double) 0.0; + + //###################################################################### + // 3. *** PHYSICS *** + //###################################################################### + + + //---------------------------------------------------------------------- + // START OF VERTICAL LOOP + //---------------------------------------------------------------------- + + for (jk = -1 + (*yrecldp).ncldtop; jk <= klev + -1; jk += 1) { + + //---------------------------------------------------------------------- + // 3.0 INITIALIZE VARIABLES + //---------------------------------------------------------------------- + + //--------------------------------- + // First guess microphysics + //--------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxfg[jm] = zqx[jl + klon*(jk + klev*(jm + 5*ibl))]; + } + + //--------------------------------- + // Set KLON arrays to zero + //--------------------------------- + + zlicld = (double) 0.0; + zrainaut = (double) 0.0; // currently needed for diags + zrainacc = (double) 0.0; // currently needed for diags + zsnowaut = (double) 0.0; // needed + zldefr = (double) 0.0; + zacust = (double) 0.0; // set later when needed + zqpretot = (double) 0.0; + zlfinalsum = (double) 0.0; + + // Required for first guess call + zlcond1 = (double) 0.0; + zlcond2 = (double) 0.0; + zsupsat = (double) 0.0; + zlevapl = (double) 0.0; + zlevapi = (double) 0.0; + + //------------------------------------- + // solvers for cloud fraction + //------------------------------------- + zsolab = (double) 0.0; + zsolac = (double) 0.0; + + zicetot = (double) 0.0; + + //------------------------------------------ + // reset matrix so missing pathways are set + //------------------------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsolqb[jn + 5*jm] = (double) 0.0; + zsolqa[jn + 5*jm] = (double) 0.0; + } + } + + //---------------------------------- + // reset new microphysics variables + //---------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zfallsrce[jm] = (double) 0.0; + zfallsink[jm] = (double) 0.0; + zconvsrce[jm] = (double) 0.0; + zconvsink[jm] = (double) 0.0; + zpsupsatsrce[jm] = (double) 0.0; + zratio[jm] = (double) 0.0; + } + + + //------------------------- + // derived variables needed + //------------------------- + + zdp = paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - + paph[jl + klon*(jk + (klev + 1)*ibl)]; // dp + zgdp = rg / zdp; // g/dp + zrho = pap[jl + klon*(jk + klev*ibl)] / (rd*ztp1[jl + klon*(jk + klev*ibl)]); // p/RT air density + + zdtgdp = ptsphy*zgdp; // dt g/dp + zrdtgdp = zdp*((double) 1.0 / (ptsphy*rg)); // 1/(dt g/dp) + + if (jk + 1 > 1) { + zdtgdpf = ptsphy*rg / (pap[jl + klon*(jk + klev*ibl)] - + pap[jl + klon*(-1 + jk + klev*ibl)]); + } + + //------------------------------------ + // Calculate dqs/dT correction factor + //------------------------------------ + // Reminder: RETV=RV/RD-1 + + // liquid + zfacw = r5les / (pow((ztp1[jl + klon*(jk + klev*ibl)] - r4les), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeeliqt[jl + klon*(jk + klev*ibl)]); + zdqsliqdt = zfacw*zcor*zqsliq[jl + klon*(jk + klev*ibl)]; + zcorqsliq = (double) 1.0 + ralvdcp*zdqsliqdt; + + // ice + zfaci = r5ies / (pow((ztp1[jl + klon*(jk + klev*ibl)] - r4ies), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeew[jl + klon*(jk + klev*ibl)]); + zdqsicedt = zfaci*zcor*zqsice[jl + klon*(jk + klev*ibl)]; + zcorqsice = (double) 1.0 + ralsdcp*zdqsicedt; + + // diagnostic mixed + zalfaw = zfoealfa[jl + klon*(jk + (klev + 1)*ibl)]; + zalfawm = zalfaw; + zfac = zalfaw*zfacw + ((double) 1.0 - zalfaw)*zfaci; + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeewmt[jl + klon*(jk + klev*ibl)]); + zdqsmixdt = zfac*zcor*zqsmix[jl + klon*(jk + klev*ibl)]; + zcorqsmix = (double) 1.0 + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)* + rtwat_rtice_r, 2)))*ralvdcp + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)* + rtwat_rtice_r, 2))))*ralsdcp))*zdqsmixdt; + + // evaporation/sublimation limits + zevaplimmix = fmax((zqsmix[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]) / zcorqsmix, (double) 0.0); + zevaplimliq = fmax((zqsliq[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]) / zcorqsliq, (double) 0.0); + zevaplimice = fmax((zqsice[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]) / zcorqsice, (double) 0.0); + + //-------------------------------- + // in-cloud consensate amount + //-------------------------------- + ztmpa = (double) 1.0 / fmax(za[jl + klon*(jk + klev*ibl)], zepsec); + zliqcld = zqx[jl + klon*(jk + klev*(0 + 5*ibl))]*ztmpa; + zicecld = zqx[jl + klon*(jk + klev*(1 + 5*ibl))]*ztmpa; + zlicld = zliqcld + zicecld; + + + //------------------------------------------------ + // Evaporate very small amounts of liquid and ice + //------------------------------------------------ + + if (zqx[jl + klon*(jk + klev*(0 + 5*ibl))] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(0)] = zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + zsolqa[0 + 5*(4)] = -zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + } + + if (zqx[jl + klon*(jk + klev*(1 + 5*ibl))] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(1)] = zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + zsolqa[1 + 5*(4)] = -zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + } + + + //--------------------------------------------------------------------- + // 3.1 ICE SUPERSATURATION ADJUSTMENT + //--------------------------------------------------------------------- + // Note that the supersaturation adjustment is made with respect to + // liquid saturation: when T>0C + // ice saturation: when T<0C + // with an adjustment made to allow for ice + // supersaturation in the clear sky + // Note also that the KOOP factor automatically clips the supersaturation + // to a maximum set by the liquid water saturation mixing ratio + // important for temperatures near to but below 0C + //----------------------------------------------------------------------- + + //----------------------------------- + // 3.1.1 Supersaturation limit (from Koop) + //----------------------------------- + // Needs to be set for all temperatures + zfokoop = ((double)(fmin(rkoop1 - rkoop2*ztp1[jl + klon*(jk + klev*ibl)], (double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))*1.0/(double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))); + + if (ztp1[jl + klon*(jk + klev*ibl)] >= rtt || (*yrecldp).nssopt == 0) { + zfac = (double) 1.0; + zfaci = (double) 1.0; + } else { + zfac = za[jl + klon*(jk + klev*ibl)] + zfokoop*((double) 1.0 - za[jl + klon*(jk +klev*ibl)]); + zfaci = ptsphy / (*yrecldp).rkooptau; + } + + //------------------------------------------------------------------- + // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + // correction factor + // [#Note: QSICE or QSLIQ] + //------------------------------------------------------------------- + + // Calculate supersaturation to add to cloud + if (za[jl + klon*(jk + klev*ibl)] > (double) 1.0 - (*yrecldp).ramin) { + zsupsat = fmax((zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - zfac*zqsice[jl + klon*(jk + klev*ibl)]) / + zcorqsice, (double) 0.0); + } else { + // Calculate environmental humidity supersaturation + zqp1env = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - + za[jl + klon*(jk + klev*ibl)]*zqsice[jl + klon*(jk + klev*ibl)]) / + fmax((double) 1.0 - za[jl + klon*(jk + klev*ibl)], zepsilon); + //& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + zsupsat = fmax(((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*(zqp1env - zfac*zqsice[jl + klon*(jk + klev*ibl)]) / + zcorqsice, (double) 0.0); + } + + //------------------------------------------------------------------- + // Here the supersaturation is turned into liquid water + // However, if the temperature is below the threshold for homogeneous + // freezing then the supersaturation is turned instantly to ice. + //-------------------------------------------------------------------- + + if (zsupsat > zepsec) { + + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zsupsat; + // Include liquid in first guess + zqxfg[0] = zqxfg[0] + zsupsat; + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zsupsat; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + zsupsat; + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*zfaci; + + } + + //------------------------------------------------------- + // 3.1.3 Include supersaturation from previous timestep + // (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + //------------------------------------------------------- + if (psupsat[jl + klon*(jk + klev*ibl)] > zepsec) { + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(0)] = zsolqa[0 + 5*(0)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[0] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add liquid to first guess for deposition term + zqxfg[0] = zqxfg[0] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(1)] = zsolqa[1 + 5*(1)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[1] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*zfaci; + // Store cloud budget diagnostics if required + } + + // on JL + + //--------------------------------------------------------------------- + // 3.2 DETRAINMENT FROM CONVECTION + //--------------------------------------------------------------------- + // * Diagnostic T-ice/liq split retained for convection + // Note: This link is now flexible and a future convection + // scheme can detrain explicit seperate budgets of: + // cloud water, ice, rain and snow + // * There is no (1-ZA) multiplier term on the cloud detrainment + // term, since is now written in mass-flux terms + // [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + //--------------------------------------------------------------------- + if (jk + 1 < klev && jk + 1 >= (*yrecldp).ncldtop) { + + + plude[jl + klon*(jk + klev*ibl)] = plude[jl + klon*(jk + klev*ibl)]*zdtgdp; + + if (/*ldcum[jl + klon*ibl] &&*/ plude[jl + klon*(jk + klev*ibl)] > (*yrecldp).rlmin + && plu[jl + klon*(1 + jk + klev*ibl)] > zepsec) { + + zsolac = zsolac + plude[jl + klon*(jk + klev*ibl)] / + plu[jl + klon*(1 + jk + klev*ibl)]; + // *diagnostic temperature split* + zalfaw = zfoealfa[jl + klon*(jk + (klev + 1)*ibl)]; + zconvsrce[0] = zalfaw*plude[jl + klon*(jk + klev*ibl)]; + zconvsrce[1] = ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)]; + zsolqa[0 + 5*(0)] = zsolqa[0 + 5*(0)] + zconvsrce[0]; + zsolqa[1 + 5*(1)] = zsolqa[1 + 5*(1)] + zconvsrce[1]; + + } else { + + plude[jl + klon*(jk + klev*ibl)] = (double) 0.0; + + } + // *convective snow detrainment source + //if (ldcum[jl + klon*ibl]) { + zsolqa[3 + 5*(3)] = zsolqa[3 + 5*(3)] + psnde[jl + klon*(jk + klev*ibl)]*zdtgdp; + //} + + + } + // JK (*yrecldp).ncldtop) { + + zmf = fmax((double) 0.0, (pmfu[jl + klon*(jk + klev*ibl)] + pmfd[jl + klon*(jk + klev*ibl)])*zdtgdp); + zacust = zmf*zanewm1; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlcust[jm] = zmf*zqxnm1[jm]; + // record total flux for enthalpy budget: + zconvsrce[jm] = zconvsrce[jm] + zlcust[jm]; + } + } + + // Now have to work out how much liquid evaporates at arrival point + // since there is no prognostic memory for in-cloud humidity, i.e. + // we always assume cloud is saturated. + + zdtdp = zrdcp*(double) 0.5*(ztp1[jl + klon*(-1 + jk + klev*ibl)] + + ztp1[jl + klon*(jk + klev*ibl)]) / paph[jl + klon*(jk + (klev + 1)*ibl)]; + zdtforc = zdtdp*(pap[jl + klon*(jk + klev*ibl)] - pap[jl + klon*(-1 + jk + klev*ibl)]); + //[#Note: Diagnostic mixed phase should be replaced below] + zdqs = zanewm1*zdtforc*zdqsmixdt; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlfinal = fmax((double) 0.0, zlcust[jm] - zdqs); //lim to zero + // no supersaturation allowed incloud ---V + zevap = fmin((zlcust[jm] - zlfinal), zevaplimmix); + // ZEVAP=0.0_JPRB + zlfinal = zlcust[jm] - zevap; + zlfinalsum = zlfinalsum + zlfinal; // sum + + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zlcust[jm]; // whole sum + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zevap; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zevap; + } + } + + // Reset the cloud contribution if no cloud water survives to this level: + if (zlfinalsum < zepsec) { + zacust = (double) 0.0; + } + zsolac = zsolac + zacust; + + } + // on JK>NCLDTOP + + //--------------------------------------------------------------------- + // Subsidence sink of cloud to the layer below + // (Implicit - re. CFL limit on convective mass flux) + //--------------------------------------------------------------------- + + + if (jk + 1 < klev) { + + zmfdn = fmax((double) 0.0, (pmfu[jl + klon*(1 + jk + klev*ibl)] + + pmfd[jl + klon*(1 + jk + klev*ibl)])*zdtgdp); + + zsolab = zsolab + zmfdn; + zsolqb[0 + 5*(0)] = zsolqb[0 + 5*(0)] + zmfdn; + zsolqb[1 + 5*(1)] = zsolqb[1 + 5*(1)] + zmfdn; + + // Record sink for cloud budget and enthalpy budget diagnostics + zconvsink[0] = zmfdn; + zconvsink[1] = zmfdn; + + } + + + //---------------------------------------------------------------------- + // 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + //---------------------------------------------------------------------- + // NOTE: In default tiedtke scheme this process decreases the cloud + // area but leaves the specific cloud water content + // within clouds unchanged + //---------------------------------------------------------------------- + + // ------------------------------ + // Define turbulent erosion rate + // ------------------------------ + zldifdt = (*yrecldp).rcldiff*ptsphy; //original version + //Increase by factor of 5 for convective points + if (ktype[jl + klon*ibl] > 0 && plude[jl + klon*(jk + klev*ibl)] > zepsec) { + zldifdt = (*yrecldp).rcldiff_convi*zldifdt; + } + + // At the moment, works on mixed RH profile and partitioned ice/liq fraction + // so that it is similar to previous scheme + // Should apply RHw for liquid cloud and RHi for ice cloud separately + if (zli[jl + klon*(jk + klev*ibl)] > zepsec) { + // Calculate environmental humidity + // ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + // & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + // ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ze = zldifdt*fmax(zqsmix[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))], (double) 0.0); + zleros = za[jl + klon*(jk + klev*ibl)]*ze; + zleros = fmin(zleros, zevaplimmix); + zleros = fmin(zleros, zli[jl + klon*(jk + klev*ibl)]); + zaeros = zleros / zlicld; //if linear term + + // Erosion is -ve LINEAR in L,A + zsolac = zsolac - zaeros; //linear + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jl + klon*(jk + klev*ibl)]*zleros; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jl + klon*(jk + klev*ibl)]*zleros; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jl + klon*(jk + klev*ibl)]*zleros; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jl + klon*(jk + klev*ibl)]*zleros; + + } + + //---------------------------------------------------------------------- + // 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + //---------------------------------------------------------------------- + // calculate dqs/dt + // Note: For the separate prognostic Qi and Ql, one would ideally use + // Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + // forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + // These would then instantaneous freeze if T<-38C or lead to ice growth + // by deposition in warmer mixed phase clouds. However, since we do + // not have a separate prognostic equation for in-cloud humidity or a + // statistical scheme approach in place, the depositional growth of ice + // in the mixed phase can not be modelled and we resort to supersaturation + // wrt ice instanteously converting to ice over one timestep + // (see Tompkins et al. QJRMS 2007 for details) + // Thus for the initial implementation the diagnostic mixed phase is + // retained for the moment, and the level of approximation noted. + //---------------------------------------------------------------------- + + zdtdp = zrdcp*ztp1[jl + klon*(jk + klev*ibl)] / pap[jl + klon*(jk + klev*ibl)]; + zdpmxdt = zdp*zqtmst; + zmfdn = (double) 0.0; + if (jk + 1 < klev) { + zmfdn = pmfu[jl + klon*(1 + jk + klev*ibl)] + pmfd[jl + klon*(1 + jk + klev*ibl)]; + } + zwtot = pvervel[jl + klon*(jk + klev*ibl)] + + (double) 0.5*rg*(pmfu[jl + klon*(jk + klev*ibl)] + pmfd[jl + klon*(jk + klev*ibl)] + zmfdn); + zwtot = fmin(zdpmxdt, fmax(-zdpmxdt, zwtot)); + zzzdt = phrsw[jl + klon*(jk + klev*ibl)] + phrlw[jl + klon*(jk + klev*ibl)]; + zdtdiab = fmin(zdpmxdt*zdtdp, fmax(-zdpmxdt*zdtdp, zzzdt))*ptsphy + ralfdcp*zldefr; + // Note: ZLDEFR should be set to the difference between the mixed phase functions + // in the convection and cloud scheme, but this is not calculated, so is zero and + // the functions must be the same + zdtforc = zdtdp*zwtot*ptsphy + zdtdiab; + zqold = zqsmix[jl + klon*(jk + klev*ibl)]; + ztold = ztp1[jl + klon*(jk + klev*ibl)]; + ztp1[jl + klon*(jk + klev*ibl)] = ztp1[jl + klon*(jk + klev*ibl)] + zdtforc; + ztp1[jl + klon*(jk + klev*ibl)] = fmax(ztp1[jl + klon*(jk + klev*ibl)], (double) 160.0); + llflag = true; + + // Formerly a call to CUADJTQ(..., ICALL=5) + zqp = (double) 1.0 / pap[jl + klon*(jk + klev*ibl)]; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond = (zqsmix[jl + klon*(jk + klev*ibl)] - zqsat) / + ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - + rtice)*rtwat_rtice_r, 2)))*r5alvcp)*(1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4les, 2)) + + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4ies, 2))))); + ztp1[jl + klon*(jk + klev*ibl)] = ztp1[jl + klon*(jk + klev*ibl)] + + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond; + zqsmix[jl + klon*(jk + klev*ibl)] = zqsmix[jl + klon*(jk + klev*ibl)] - zcond; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond1 = (zqsmix[jl + klon*(jk + klev*ibl)] - zqsat) / + ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - + rtice)*rtwat_rtice_r, 2)))*r5alvcp)*(1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4les, 2)) + + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4ies, 2))))); + ztp1[jl + klon*(jk + klev*ibl)] = ztp1[jl + klon*(jk + klev*ibl)] + + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond1; + zqsmix[jl + klon*(jk + klev*ibl)] = zqsmix[jl + klon*(jk + klev*ibl)] - zcond1; + + zdqs = zqsmix[jl + klon*(jk + klev*ibl)] - zqold; + zqsmix[jl + klon*(jk + klev*ibl)] = zqold; + ztp1[jl + klon*(jk + klev*ibl)] = ztold; + + //---------------------------------------------------------------------- + // 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + // ---------------------------------------------------------------------- + // Erosion term is LINEAR in L + // Changed to be uniform distribution in cloud region + + + // Previous function based on DELTA DISTRIBUTION in cloud: + if (zdqs > (double) 0.0) { + // If subsidence evaporation term is turned off, then need to use updated + // liquid and cloud here? + // ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + zlevap = za[jl + klon*(jk + klev*ibl)]*fmin(zdqs, zlicld); + zlevap = fmin(zlevap, zevaplimmix); + zlevap = fmin(zlevap, fmax(zqsmix[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))], (double) 0.0)); + + // For first guess call + zlevapl = zliqfrac[jl + klon*(jk + klev*ibl)]*zlevap; + zlevapi = zicefrac[jl + klon*(jk + klev*ibl)]*zlevap; + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jl + klon*(jk + klev*ibl)]*zlevap; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jl + klon*(jk + klev*ibl)]*zlevap; + + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jl + klon*(jk + klev*ibl)]*zlevap; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jl + klon*(jk + klev*ibl)]*zlevap; + + } + + + //---------------------------------------------------------------------- + // 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + //---------------------------------------------------------------------- + // (1) Increase of cloud water in existing clouds + if (za[jl + klon*(jk + klev*ibl)] > zepsec && zdqs <= -(*yrecldp).rlmin) { + + zlcond1 = fmax(-zdqs, (double) 0.0); //new limiter + + //old limiter (significantly improves upper tropospheric humidity rms) + if (za[jl + klon*(jk + klev*ibl)] > (double) 0.99) { + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsmix[jl + klon*(jk + klev*ibl)]); + zcdmax = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - zqsmix[jl + klon*(jk + klev*ibl)]) / + ((double) 1.0 + zcor*zqsmix[jl + klon*(jk + klev*ibl)]*((double)(((double) + (fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4les, 2)) + + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))) + *r5alscp)*(1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4ies, 2))))); + } else { + zcdmax = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]*zqsmix[jl + klon*(jk + klev*ibl)]) / + za[jl + klon*(jk + klev*ibl)]; + } + zlcond1 = fmax(fmin(zlcond1, zcdmax), (double) 0.0); + // end old limiter + + zlcond1 = za[jl + klon*(jk + klev*ibl)]*zlcond1; + if (zlcond1 < (*yrecldp).rlmin) { + zlcond1 = (double) 0.0; + } + + //------------------------------------------------------------------------- + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------- + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond1; + zqxfg[0] = zqxfg[0] + zlcond1; + } else { + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond1; + zqxfg[1] = zqxfg[1] + zlcond1; + } + } + + // (2) Generation of new clouds (da/dt>0) + + + if (zdqs <= -(*yrecldp).rlmin && za[jl + klon*(jk + klev*ibl)] < (double) 1.0 - zepsec) { + + //--------------------------- + // Critical relative humidity + //--------------------------- + zrhc = (*yrecldp).ramid; + zsigk = pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]; + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + if (zsigk > (double) 0.8) { + zrhc = (*yrecldp).ramid + ((double) 1.0 - (*yrecldp).ramid)*(pow(((zsigk - (double) 0.8) / + (double) 0.2), 2)); + } + + // Commented out for CY37R1 to reduce humidity in high trop and strat + // ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + // ZBOTT=ZTRPAUS(JL)+0.2_JPRB + // IF(ZSIGK < ZBOTT) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + // ENDIF + + //--------------------------- + // Supersaturation options + //--------------------------- + if ((*yrecldp).nssopt == 0) { + // No scheme + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 1) { + // Tompkins + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - + za[jl + klon*(jk + klev*ibl)]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 2) { + // Lohmann and Karcher + zqe = zqx[jl + klon*(jk + klev*(4 + 5*ibl))]; + } else if ((*yrecldp).nssopt == 3) { + // Gierens + zqe = + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + zli[jl + klon*(jk + klev*ibl)]; + } + + if (ztp1[jl + klon*(jk + klev*ibl)] >= rtt || (*yrecldp).nssopt == 0) { + // No ice supersaturation allowed + zfac = (double) 1.0; + } else { + // Ice supersaturation + zfac = zfokoop; + } + + if (zqe >= zrhc*zqsice[jl + klon*(jk + klev*ibl)]*zfac && + zqe < zqsice[jl + klon*(jk + klev*ibl)]*zfac) { + // note: not **2 on 1-a term if ZQE is used. + // Added correction term ZFAC to numerator 15/03/2010 + zacond = -((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*zfac*zdqs / + fmax((double) 2.0*(zfac*zqsice[jl + klon*(jk + klev*ibl)] - zqe), zepsec); + + zacond = fmin(zacond, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); //PUT THE LIMITER BACK + + // Linear term: + // Added correction term ZFAC 15/03/2010 + zlcond2 = -zfac*zdqs*(double) 0.5*zacond; //mine linear + + // new limiter formulation + zzdl = (double) 2.0*(zfac*zqsice[jl + klon*(jk + klev*ibl)] - zqe) / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + // Added correction term ZFAC 15/03/2010 + if (zfac*zdqs < -zzdl) { + // ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + zlcondlim = (za[jl + klon*(jk + klev*ibl)] - (double) 1.0)*zfac*zdqs - zfac* + zqsice[jl + klon*(jk + klev*ibl)] + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]; + zlcond2 = fmin(zlcond2, zlcondlim); + } + zlcond2 = fmax(zlcond2, (double) 0.0); + + if (zlcond2 < (*yrecldp).rlmin || ((double) 1.0 - + za[jl + klon*(jk + klev*ibl)]) < zepsec) { + zlcond2 = (double) 0.0; + zacond = (double) 0.0; + } + if (zlcond2 == (double) 0.0) { + zacond = (double) 0.0; + } + + // Large-scale generation is LINEAR in A and LINEAR in L + zsolac = zsolac + zacond; //linear + + //------------------------------------------------------------------------ + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------ + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond2; + zqxfg[0] = zqxfg[0] + zlcond2; + } else { + // homogeneous freezing + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond2; + zqxfg[1] = zqxfg[1] + zlcond2; + } + + } + } + + //---------------------------------------------------------------------- + // 3.7 Growth of ice by vapour deposition + //---------------------------------------------------------------------- + // Following Rotstayn et al. 2001: + // does not use the ice nuclei number from cloudaer.F90 + // but rather a simple Meyers et al. 1992 form based on the + // supersaturation and assuming clouds are saturated with + // respect to liquid water (well mixed), (or Koop adjustment) + // Growth considered as sink of liquid water if present so + // Bergeron-Findeisen adjustment in autoconversion term no longer needed + //---------------------------------------------------------------------- + + //-------------------------------------------------------- + //- + //- Ice deposition following Rotstayn et al. (2001) + //- (monodisperse ice particle size distribution) + //- + //-------------------------------------------------------- + if (idepice == 1) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[jl + klon*(-1 + jk + klev*ibl)] < (*yrecldp).rcldtopcf && + za[jl + klon*(jk + klev*ibl)] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jl + klon*(jk + klev*ibl)] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq - (double) 0.639); + + //------------------------------------------------ + // 2.4e-2 is conductivity of air + // 8.8 = 700**1/3 = density of ice to the third + //------------------------------------------------ + zadd = rlstt*(rlstt / (rv*ztp1[jl + klon*(jk + klev*ibl)]) - (double) 1.0) / + ((double) 2.4E-2*ztp1[jl + klon*(jk + klev*ibl)]); + zbdd = rv*ztp1[jl + klon*(jk + klev*ibl)]*pap[jl + klon*(jk + klev*ibl)] / + ((double) 2.21*zvpice); + zcvds = (double) 7.8*(pow((zicenuclei / zrho), (double) 0.666))*(zvpliq - + zvpice) / ((double) 8.87*(zadd + zbdd)*zvpice); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + //------------------ + // new value of ice: + //------------------ + zinew = pow(((double) 0.666*zcvds*ptsphy + (pow(zice0, (double) 0.666))), (double) 1.5); + + //--------------------------- + // grid-mean deposition rate: + //--------------------------- + zdepos = fmax(za[jl + klon*(jk + klev*ibl)]*(zinew - zice0), (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp).rdepliqrefrate + + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + + } + + //-------------------------------------------------------- + //- + //- Ice deposition assuming ice PSD + //- + //-------------------------------------------------------- + } else if (idepice == 2) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[jl + klon*(-1 + jk + klev*ibl)] < (*yrecldp).rcldtopcf && + za[jl + klon*(jk + klev*ibl)] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jl + klon*(jk + klev*ibl)] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq - (double) 0.639); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + // Particle size distribution + ztcg = (double) 1.0; + zfacx1i = (double) 1.0; + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice* + ztp1[jl + klon*(jk + klev*ibl)] + pap[jl + klon*(jk + klev*ibl)]* + (*yrecldp).rcl_apb3*(pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 3.)); + zcorrfac = pow(((double) 1.0 / zrho), (double) 0.5); + zcorrfac2 = (pow((ztp1[jl + klon*(jk + klev*ibl)] / (double) 273.0), (double)1.5))* + ((double) 393.0 / (ztp1[jl + klon*(jk + klev*ibl)] + (double) 120.0)); + + zpr02 = zrho*zice0*(*yrecldp).rcl_const1i / (ztcg*zfacx1i); + + zterm1 = (zvpliq - zvpice)*(pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 2.0))* + zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2i*zfacx1i / (zrho*zaplusb*zvpice); + zterm2 = (double) 0.65*(*yrecldp).rcl_const6i*(pow(zpr02, (*yrecldp).rcl_const4i)) + + (*yrecldp).rcl_const3i*(pow(zcorrfac, (double) 0.5))*(pow(zrho, (double) 0.5))* + (pow(zpr02, (*yrecldp).rcl_const5i)) / (pow(zcorrfac2, (double) 0.5)); + + zdepos = fmax(za[jl + klon*(jk + klev*ibl)]*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)* + ((*yrecldp).rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + } + + } + // on IDEPICE + + //###################################################################### + // 4 *** PRECIPITATION PROCESSES *** + //###################################################################### + + //---------------------------------- + // revise in-cloud consensate amount + //---------------------------------- + ztmpa = (double) 1.0 / fmax(za[jl + klon*(jk + klev*ibl)], zepsec); + zliqcld = zqxfg[0]*ztmpa; + zicecld = zqxfg[1]*ztmpa; + zlicld = zliqcld + zicecld; + + //---------------------------------------------------------------------- + // 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + // now that rain, snow, graupel species are prognostic + // the precipitation flux can be defined directly level by level + // There is no vertical memory required from the flux variable + //---------------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm] || jm + 1 == 2) { + //------------------------ + // source from layer above + //------------------------ + if (jk + 1 > (*yrecldp).ncldtop) { + zfallsrce[jm] = zpfplsx[jl + klon*(jk + (klev + 1)*(jm + 5*ibl))]*zdtgdp; + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zfallsrce[jm]; + zqxfg[jm] = zqxfg[jm] + zfallsrce[jm]; + // use first guess precip----------V + zqpretot = zqpretot + zqxfg[jm]; + } + //------------------------------------------------- + // sink to next layer, constant fall speed + //------------------------------------------------- + // if aerosol effect then override + // note that for T>233K this is the same as above. + if ((*yrecldp).laericesed && jm + 1 == 2) { + zre_ice = pre_ice[jl + klon*(jk + klev*ibl)]; + // The exponent value is from + // Morrison et al. JAS 2005 Appendix + zvqx[1] = (double) 0.002*(pow(zre_ice, (double) 1.0)); + } + zfall = zvqx[jm]*zrho; + //------------------------------------------------- + // modified by Heymsfield and Iaquinta JAS 2000 + //------------------------------------------------- + // ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + // &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + zfallsink[jm] = zdtgdp*zfall; + // Cloud budget diagnostic stored at end as implicit + // jl + } + // LLFALL + } + // jm + + //--------------------------------------------------------------- + // Precip cover overlap using MAX-RAN Overlap + // Since precipitation is now prognostic we must + // 1) apply an arbitrary minimum coverage (0.3) if precip>0 + // 2) abandon the 2-flux clr/cld treatment + // 3) Thus, since we have no memory of the clear sky precip + // fraction, we mimic the previous method by reducing + // ZCOVPTOT(JL), which has the memory, proportionally with + // the precip evaporation rate, taking cloud fraction + // into account + // #3 above leads to much smoother vertical profiles of + // precipitation fraction than the Klein-Jakob scheme which + // monotonically increases precip fraction and then resets + // it to zero in a step function once clear-sky precip reaches + // zero. + //--------------------------------------------------------------- + if (zqpretot > zepsec) { + zcovptot = (double) 1.0 - (((double) 1.0 - zcovptot)*((double) 1.0 - + fmax(za[jl + klon*(jk + klev*ibl)], za[jl + klon*(-1 + jk + klev*ibl)])) / + ((double) 1.0 - fmin(za[jl + klon*(-1 + jk + klev*ibl)], (double) 1.0 - (double) 1.E-06))); + zcovptot = fmax(zcovptot, (*yrecldp).rcovpmin); + zcovpclr = fmax((double) 0.0, zcovptot - za[jl + klon*(jk + klev*ibl)]); // clear sky proportion + zraincld = zqxfg[2] / zcovptot; + zsnowcld = zqxfg[3] / zcovptot; + zcovpmax = fmax(zcovptot, zcovpmax); + } else { + zraincld = (double) 0.0; + zsnowcld = (double) 0.0; + zcovptot = (double) 0.0; // no flux - reset cover + zcovpclr = (double) 0.0; // reset clear sky proportion + zcovpmax = (double) 0.0; // reset max cover for ZZRH calc + } + + //---------------------------------------------------------------------- + // 4.3a AUTOCONVERSION TO SNOW + //---------------------------------------------------------------------- + + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt) { + //----------------------------------------------------- + // Snow Autoconversion rate follow Lin et al. 1983 + //----------------------------------------------------- + if (zicecld > zepsec) { + + zzco = ptsphy*(*yrecldp).rsnowlin1*exp((*yrecldp).rsnowlin2* + (ztp1[jl + klon*(jk + klev*ibl)] - rtt)); + + if ((*yrecldp).laericeauto) { + zlcrit = picrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=0.027 + zzco = zzco*(pow(((*yrecldp).rnice / pnice[jl + klon*(jk + klev*ibl)]), (double) 0.333)); + } else { + zlcrit = (*yrecldp).rlcritsnow; + } + + zsnowaut = zzco*((double) 1.0 - exp(-(pow((zicecld / zlcrit), 2)))); + zsolqb[3 + 5*(1)] = zsolqb[3 + 5*(1)] + zsnowaut; + + } + } + + //---------------------------------------------------------------------- + // 4.3b AUTOCONVERSION WARM CLOUDS + // Collection and accretion will require separate treatment + // but for now we keep this simple treatment + //---------------------------------------------------------------------- + + if (zliqcld > zepsec) { + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Sundqvist (1989) + //- + //-------------------------------------------------------- + if (iwarmrain == 1) { + + zzco = (*yrecldp).rkconv*ptsphy; + + if ((*yrecldp).laerliqautolsp) { + zlcrit = plcrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=125 cm-3 + zzco = zzco*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), (double) 0.333)); + } else { + // Modify autoconversion threshold dependent on: + // land (polluted, high CCN, smaller droplets, higher threshold) + // sea (clean, low CCN, larger droplets, lower threshold) + if (plsm[jl + klon*ibl] > (double) 0.5) { + zlcrit = (*yrecldp).rclcrit_land; // land + } else { + zlcrit = (*yrecldp).rclcrit_sea; // ocean + } + } + + //------------------------------------------------------------------ + // Parameters for cloud collection by rain and snow. + // Note that with new prognostic variable it is now possible + // to REPLACE this with an explicit collection parametrization + //------------------------------------------------------------------ + zprecip = (zpfplsx[jl + klon*(jk + (klev + 1)*(3 + 5*ibl))] + + zpfplsx[jl + klon*(jk + (klev + 1)*(2 + 5*ibl))]) / fmax(zepsec, zcovptot); + zcfpr = (double) 1.0 + (*yrecldp).rprc1*sqrt(fmax(zprecip, (double) 0.0)); + // ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + // &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + if ((*yrecldp).laerliqcoll) { + // 5.0 = N**0.333 with N=125 cm-3 + zcfpr = zcfpr*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), (double) 0.333)); + } + + zzco = zzco*zcfpr; + zlcrit = zlcrit / fmax(zcfpr, zepsec); + + if (zliqcld / zlcrit < (double) 20.0) { + // Security for exp for some compilers + zrainaut = zzco*((double) 1.0 - exp(-(pow((zliqcld / zlcrit), 2)))); + } else { + zrainaut = zzco; + } + + // rain freezes instantly + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt) { + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zrainaut; + } else { + zsolqb[2 + 5*(0)] = zsolqb[2 + 5*(0)] + zrainaut; + } + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Khairoutdinov and Kogan (2000) + //- + //-------------------------------------------------------- + } else if (iwarmrain == 2) { + + if (plsm[jl + klon*ibl] > (double) 0.5) { + // land + zconst = (*yrecldp).rcl_kk_cloud_num_land; + zlcrit = (*yrecldp).rclcrit_land; + } else { + // ocean + zconst = (*yrecldp).rcl_kk_cloud_num_sea; + zlcrit = (*yrecldp).rclcrit_sea; + } + + if (zliqcld > zlcrit) { + + zrainaut = (double) 1.5*za[jl + klon*(jk + klev*ibl)]*ptsphy* + (*yrecldp).rcl_kkaau*(pow(zliqcld, (*yrecldp).rcl_kkbauq))* + (pow(zconst, (*yrecldp).rcl_kkbaun)); + + zrainaut = fmin(zrainaut, zqxfg[0]); + if (zrainaut < zepsec) { + zrainaut = (double) 0.0; + } + + zrainacc = (double) 2.0*za[jl + klon*(jk + klev*ibl)]*ptsphy*(*yrecldp).rcl_kkaac* + (pow((zliqcld*zraincld), (*yrecldp).rcl_kkbac)); + + zrainacc = fmin(zrainacc, zqxfg[0]); + if (zrainacc < zepsec) { + zrainacc = (double) 0.0; + } + + } else { + zrainaut = (double) 0.0; + zrainacc = (double) 0.0; + } + + // If temperature < 0, then autoconversion produces snow rather than rain + // Explicit + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt) { + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainaut; + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainaut; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainacc; + } else { + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainaut; + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainaut; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainacc; + } + + } + // on IWARMRAIN + + } + // on ZLIQCLD > ZEPSEC + + + //---------------------------------------------------------------------- + // RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + // only active if T<0degC and supercooled liquid water is present + // AND if not Sundquist autoconversion (as this includes riming) + //---------------------------------------------------------------------- + if (iwarmrain > 1) { + + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt && zliqcld > zepsec) { + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), (double) 0.4); + + //------------------------------------------------------------------ + // Riming of snow by cloud water - implicit in lwc + //------------------------------------------------------------------ + if (zsnowcld > zepsec && zcovptot > (double) 0.01) { + + // Calculate riming term + // Factor of liq water taken out because implicit + zsnowrime = (double) 0.3*zcovptot*ptsphy*(*yrecldp).rcl_const7s* + zfallcorr*(pow((zrho*zsnowcld*(*yrecldp).rcl_const1s), (*yrecldp).rcl_const8s)); + + // Limit snow riming term + zsnowrime = fmin(zsnowrime, (double) 1.0); + + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zsnowrime; + + } + + //------------------------------------------------------------------ + // Riming of ice by cloud water - implicit in lwc + // NOT YET ACTIVE + //------------------------------------------------------------------ + // IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + // + // ! Calculate riming term + // ! Factor of liq water taken out because implicit + // ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + // & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + // + // ! Limit ice riming term + // ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + // + // ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + // + // ENDIF + } + + } + // on IWARMRAIN > 1 + + + //---------------------------------------------------------------------- + // 4.4a MELTING OF SNOW and ICE + // with new implicit solver this also has to treat snow or ice + // precipitating from the level above... i.e. local ice AND flux. + // in situ ice and snow: could arise from LS advection or warming + // falling ice and snow: arrives by precipitation process + //---------------------------------------------------------------------- + + zicetot = zqxfg[1] + zqxfg[3]; + zmeltmax = (double) 0.0; + + // If there are frozen hydrometeors present and dry-bulb temperature > 0degC + if (zicetot > zepsec && ztp1[jl + klon*(jk + klev*ibl)] > rtt) { + + // Calculate subsaturation + zsubsat = fmax(zqsice[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))], (double) 0.0); + + // Calculate difference between dry-bulb (ZTP1) and the temperature + // at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + // Melting only occurs if the wet-bulb temperature >0 + // i.e. warming of ice particle due to melting > cooling + // due to evaporation. + ztdmtw0 = ztp1[jl + klon*(jk + klev*ibl)] - rtt - zsubsat*(ztw1 + ztw2*(pap[jl + klon*(jk + klev*ibl)] - ztw3) - + ztw4*(ztp1[jl + klon*(jk + klev*ibl)] - ztw5)); + // Not implicit yet... + // Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*ztdmtw0) / (*yrecldp).rtaumel); + zmeltmax = fmax(ztdmtw0*zcons1*zrldcp, (double) 0.0); + } + + // Loop over frozen hydrometeors (ice, snow) + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (iphase[jm] == 2) { + jn = imelt[jm]; + if (zmeltmax > zepsec && zicetot > zepsec) { + // Apply melting in same proportion as frozen hydrometeor fractions + zalfa = zqxfg[jm] / zicetot; + zmelt = fmin(zqxfg[jm], zalfa*zmeltmax); + // needed in first guess + // This implies that zqpretot has to be recalculated below + // since is not conserved here if ice falls and liquid doesn't + zqxfg[jm] = zqxfg[jm] - zmelt; + zqxfg[-1 + jn] = zqxfg[-1 + jn] + zmelt; + zsolqa[-1 + jn + 5*jm] = zsolqa[-1 + jn + 5*jm] + zmelt; + zsolqa[jm + 5*(-1 + jn)] = zsolqa[jm + 5*(-1 + jn)] - zmelt; + } + } + } + + //---------------------------------------------------------------------- + // 4.4b FREEZING of RAIN + //---------------------------------------------------------------------- + + // If rain present + if (zqx[jl + klon*(jk + klev*(2 + 5*ibl))] > zepsec) { + + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt && ztp1[jl + klon*(-1 + jk + klev*ibl)] > rtt) { + // Base of melting layer/top of refreezing layer so + // store rain/snow fraction for precip type diagnosis + // If mostly rain, then supercooled rain slow to freeze + // otherwise faster to freeze (snow or ice pellets) + zqpretot = fmax(zqx[jl + klon*(jk + klev*(3 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(2 + 5*ibl))], zepsec); + prainfrac_toprfz[jl + klon*ibl] = zqx[jl + klon*(jk + klev*(2 + 5*ibl))] / zqpretot; + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + llrainliq = true; + } else { + llrainliq = false; + } + } + + // If temperature less than zero + if (ztp1[jl + klon*(jk + klev*ibl)] < rtt) { + + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + + // Majority of raindrops completely melted + // Refreezing is by slow heterogeneous freezing + + // Slope of rain particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zqx[jl + klon*(jk + klev*(2 +5*ibl))])), + (*yrecldp).rcl_fac2); + + // Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ztemp = (*yrecldp).rcl_fzrab*(ztp1[jl + klon*(jk + klev*ibl)] - rtt); + zfrz = ptsphy*((*yrecldp).rcl_const5r / zrho)*(exp(ztemp) - + (double) 1.)*(pow(zlambda, (*yrecldp).rcl_const6r)); + zfrzmax = fmax(zfrz, (double) 0.0); + + } else { + + // Majority of raindrops only partially melted + // Refreeze with a shorter timescale (reverse of melting...for now) + + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*(rtt - + ztp1[jl + klon*(jk + klev*ibl)])) / (*yrecldp).rtaumel); + zfrzmax = fmax((rtt - ztp1[jl + klon*(jk + klev*ibl)])*zcons1*zrldcp, (double) 0.0); + + } + + if (zfrzmax > zepsec) { + zfrz = fmin(zqx[jl + klon*(jk + klev*(2 + 5*ibl))], zfrzmax); + zsolqa[3 + 5*(2)] = zsolqa[3 + 5*(2)] + zfrz; + zsolqa[2 + 5*(3)] = zsolqa[2 + 5*(3)] - zfrz; + } + } + + } + + + //---------------------------------------------------------------------- + // 4.4c FREEZING of LIQUID + //---------------------------------------------------------------------- + // not implicit yet... + zfrzmax = fmax(((*yrecldp).rthomo - ztp1[jl + klon*(jk + klev*ibl)])*zrldcp, (double) 0.0); + + jm = 1; + jn = imelt[-1 + jm]; + if (zfrzmax > zepsec && zqxfg[-1 + jm] > zepsec) { + zfrz = fmin(zqxfg[-1 + jm], zfrzmax); + zsolqa[-1 + jn + 5*(-1 + jm)] = zsolqa[-1 + jn + 5*(-1 + jm)] + zfrz; + zsolqa[-1 + jm + 5*(-1 + jn)] = zsolqa[-1 + jm + 5*(-1 + jn)] - zfrz; + } + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF RAIN/SNOW + //---------------------------------------------------------------------- + + //---------------------------------------- + // Rain evaporation scheme from Sundquist + //---------------------------------------- + if (ievaprain == 1) { + + // Rain + + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsliq[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsliq[jl + klon*(jk + klev*ibl)])); + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + // note: zpreclr is a rain flux + zpreclr = zqxfg[2]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]) / + (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(double) 0.5*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsliq; + zdpr = zcovpclr*zbeta*(zqsliq[jl + klon*(jk + klev*ibl)] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Evaporate rain + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jl + klon*(jk + klev*ibl)])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + + //--------------------------------------------------------- + // Rain evaporation scheme based on Abel and Boutle (2013) + //--------------------------------------------------------- + } else if (ievaprain == 2) { + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for rain evaporation + // to avoid cloud formation and saturation of the grid box + //----------------------------------------------------------------------- + // Limit RH for rain evaporation dependent on precipitation fraction + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + // Critical relative humidity + //ZRHC=RAMID + //ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + //IF(ZSIGK > 0.8_JPRB) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + //ENDIF + //ZZRH = MIN(ZRHC,ZZRH) + + // Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + zzrh = fmin((double) 0.8, zzrh); + + zqe = fmax((double) 0.0, fmin(zqx[jl + klon*(jk + klev*(4 + 5*ibl))], + zqsliq[jl + klon*(jk + klev*ibl)])); + + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && + zqe < zzrh*zqsliq[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + + //------------------------------------------- + // Abel and Boutle (2012) evaporation + //------------------------------------------- + // Calculate local precipitation (kg/kg) + zpreclr = zqxfg[2] / zcovptot; + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), 0.4); + + // Saturation vapour pressure with respect to liquid phase + zesatliq = rv / rd*((double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))); + + // Slope of particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zpreclr)), (*yrecldp).rcl_fac2); // ZPRECLR=kg/kg + + zevap_denom = (*yrecldp).rcl_cdenom1*zesatliq - (*yrecldp).rcl_cdenom2* + ztp1[jl + klon*(jk + klev*ibl)]*zesatliq + (*yrecldp).rcl_cdenom3* + (pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 3.))*pap[jl + klon*(jk + klev*ibl)]; + + // Temperature dependent conductivity + zcorr2 = (pow((ztp1[jl + klon*(jk + klev*ibl)] / (double) 273.), (double) 1.5))*(double) 393. / + (ztp1[jl + klon*(jk + klev*ibl)] + (double) 120.); + zka = (*yrecldp).rcl_ka273*zcorr2; + + zsubsat = fmax(zzrh*zqsliq[jl + klon*(jk + klev*ibl)] - zqe, (double) 0.0); + + zbeta = ((double) 0.5 / zqsliq[jl + klon*(jk + klev*ibl)])* + (pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 2.))*zesatliq*(*yrecldp).rcl_const1r*(zcorr2 / + zevap_denom)*((double) 0.78 / (pow(zlambda, (*yrecldp).rcl_const4r)) + + (*yrecldp).rcl_const2r*(pow((zrho*zfallcorr), (double) 0.5)) / ((pow(zcorr2, + (double) 0.5))*(pow(zlambda, (*yrecldp).rcl_const3r)))); + + zdenom = (double) 1.0 + zbeta*ptsphy; //*ZCORQSLIQ(JL) + zdpevap = zcovpclr*zbeta*ptsphy*zsubsat / zdenom; + + //--------------------------------------------------------- + // Add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Limit rain evaporation + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jl + klon*(jk + klev*ibl)])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + } + // on IEVAPRAIN + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF SNOW + //---------------------------------------------------------------------- + // Snow + if (ievapsnow == 1) { + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice[jl + klon*(jk + klev*ibl)])); + llo1 = zcovpclr > zepsec && zqxfg[3] > zepsec && + zqe < zzrh*zqsice[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + // note: zpreclr is a rain flux a + zpreclr = zqxfg[3]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]) / + (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsice; + zdpr = zcovpclr*zbeta*(zqsice[jl + klon*(jk + klev*ibl)] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce snow to zero and model + // produces small amounts of snowfall everywhere. + //--------------------------------------------------------- + + // Evaporate snow + zevap = fmin(zdpevap, zqxfg[3]); + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jl + klon*(jk + klev*ibl)])*zevap / zqxfg[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + //--------------------------------------------------------- + } else if (ievapsnow == 2) { + + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for snow evaporation + //----------------------------------------------------------------------- + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice[jl + klon*(jk + klev*ibl)])); + llo1 = zcovpclr > zepsec && zqx[jl + klon*(jk + klev*(3 + 5*ibl))] > zepsec + && zqe < zzrh*zqsice[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + + // Calculate local precipitation (kg/kg) + zpreclr = zqx[jl + klon*(jk + klev*(3 + 5*ibl))] / zcovptot; + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))*rv / rd; + + // Particle size distribution + // ZTCG increases Ni with colder temperatures - essentially a + // Fletcher or Meyers scheme? + ztcg = (double) 1.0; //v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + // ZFACX1I modification is based on Andrew Barrett's results + zfacx1s = (double) 1.0; //v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice* + ztp1[jl + klon*(jk + klev*ibl)] + pap[jl + klon*(jk + klev*ibl)]*(*yrecldp).rcl_apb3* + (pow(ztp1[jl + klon*(jk + klev*ibl)], 3)); + zcorrfac = pow((1.0 / zrho), 0.5); + zcorrfac2 = (pow((ztp1[jl + klon*(jk + klev*ibl)] / 273.0), 1.5))*(393.0 / (ztp1[jl + klon*(jk + klev*ibl)] + 120.0)); + + zpr02 = zrho*zpreclr*(*yrecldp).rcl_const1s / (ztcg*zfacx1s); + + zterm1 = (zqsice[jl + klon*(jk + klev*ibl)] - zqe)*(pow(ztp1[jl + klon*(jk + klev*ibl)], 2))* + zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2s*zfacx1s / (zrho*zaplusb*zqsice[jl + klon*(jk + klev*ibl)]); + zterm2 = 0.65*(*yrecldp).rcl_const6s*(pow(zpr02, (*yrecldp).rcl_const4s)) + + (*yrecldp).rcl_const3s*(pow(zcorrfac, 0.5))*(pow(zrho, 0.5))*(pow(zpr02, + (*yrecldp).rcl_const5s)) / (pow(zcorrfac2, 0.5)); + + zdpevap = fmax(zcovpclr*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit evaporation to snow amount + //-------------------------------------------------------------------- + zevap = fmin(zdpevap, zevaplimice); + zevap = fmin(zevap, zqx[jl + klon*(jk + klev*(3 + 5*ibl))]); + + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jl + klon*(jk + klev*ibl)])*zevap / zqx[jl + klon*(jk + klev*(3 + 5*ibl))])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + + } + // on IEVAPSNOW + + //-------------------------------------- + // Evaporate small precipitation amounts + //-------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm]) { + if (zqxfg[jm] < (*yrecldp).rlmin) { + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zqxfg[jm]; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zqxfg[jm]; + } + } + } + + //###################################################################### + // 5.0 *** SOLVERS FOR A AND L *** + // now use an implicit solution rather than exact solution + // solver is forward in time, upstream difference for advection + //###################################################################### + + //--------------------------- + // 5.1 solver for cloud cover + //--------------------------- + zanew = (za[jl + klon*(jk + klev*ibl)] + zsolac) / ((double) 1.0 + zsolab); + zanew = fmin(zanew, (double) 1.0); + if (zanew < (*yrecldp).ramin) { + zanew = (double) 0.0; + } + zda = zanew - zaorig[jl + klon*(jk + klev*ibl)]; + //--------------------------------- + // variables needed for next level + //--------------------------------- + zanewm1 = zanew; + + //-------------------------------- + // 5.2 solver for the microphysics + //-------------------------------- + + //-------------------------------------------------------------- + // Truncate explicit sinks to avoid negatives + // Note: Species are treated in the order in which they run out + // since the clipping will alter the balance for the other vars + //-------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + llindex3[jn + 5*jm] = false; + } + zsinksum[jm] = (double) 0.0; + } + + //---------------------------- + // collect sink terms and mark + //---------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsinksum[jm] = zsinksum[jm] - zsolqa[jm + 5*jn]; // +ve total is bad + } + } + + //--------------------------------------- + // calculate overshoot and scaling factor + //--------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zmax = fmax(zqx[jl + klon*(jk + klev*(jm + 5*ibl))], zepsec); + zrat = fmax(zsinksum[jm], zmax); + zratio[jm] = zmax / zrat; + } + + //-------------------------------------------- + // scale the sink terms, in the correct order, + // recalculating the scale factor each time + //-------------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zsinksum[jm] = (double) 0.0; + } + + //---------------- + // recalculate sum + //---------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + psum_solqa = 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + psum_solqa = psum_solqa + zsolqa[jm + 5*jn]; + } + // ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + zsinksum[jm] = zsinksum[jm] - psum_solqa; + //--------------------------- + // recalculate scaling factor + //--------------------------- + zmm = fmax(zqx[jl + klon*(jk + klev*(jm + 5*ibl))], zepsec); + zrr = fmax(zsinksum[jm], zmm); + zratio[jm] = zmm / zrr; + //------ + // scale + //------ + zzratio = zratio[jm]; + //DIR$ IVDEP + //DIR$ PREFERVECTOR + for (jn = 0; jn <= 5 + -1; jn += 1) { + if (zsolqa[jm + 5*jn] < (double) 0.0) { + zsolqa[jm + 5*jn] = zsolqa[jm + 5*jn]*zzratio; + zsolqa[jn + 5*jm] = zsolqa[jn + 5*jm]*zzratio; + } + } + } + + //-------------------------------------------------------------- + // 5.2.2 Solver + //------------------------ + + //------------------------ + // set the LHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + //---------------------------------------------- + // diagonals: microphysical sink terms+transport + //---------------------------------------------- + if (jn + 1 == jm + 1) { + zqlhs[jn + 5*jm] = (double) 1.0 + zfallsink[jm]; + for (jo = 0; jo <= 5 + -1; jo += 1) { + zqlhs[jn + 5*jm] = zqlhs[jn + 5*jm] + zsolqb[jo + 5*jn]; + } + //------------------------------------------ + // non-diagonals: microphysical source terms + //------------------------------------------ + } else { + zqlhs[jn + 5*jm] = -zsolqb[jn + 5*jm]; // here is the delta T - missing from doc. + } + } + } + + //------------------------ + // set the RHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + //--------------------------------- + // sum the explicit source and sink + //--------------------------------- + zexplicit = (double) 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + zexplicit = zexplicit + zsolqa[jm + 5*jn]; // sum over middle index + } + zqxn[jm] = zqx[jl + klon*(jk + klev*(jm + 5*ibl))] + zexplicit; + } + + //----------------------------------- + // *** solve by LU decomposition: *** + //----------------------------------- + + // Note: This fast way of solving NCLVxNCLV system + // assumes a good behaviour (i.e. non-zero diagonal + // terms with comparable orders) of the matrix stored + // in ZQLHS. For the moment this is the case but + // be aware to preserve it when doing eventual + // modifications. + + // Non pivoting recursive factorization + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + // number of steps + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + // row index + zqlhs[jm + 5*jn] = zqlhs[jm + 5*jn] / zqlhs[jn + 5*jn]; + for (ik = jn + 1; ik <= 5 + -1; ik += 1) { + // column index + zqlhs[jm + 5*ik] = zqlhs[jm + 5*ik] - zqlhs[jm + 5*jn]*zqlhs[jn + 5*ik]; + } + } + } + + // Backsubstitution + // step 1 + for (jn = 1; jn <= 5 + -1; jn += 1) { + for (jm = 0; jm <= jn + 1 - 1 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + } + // step 2 + zqxn[4] = zqxn[4] / zqlhs[4 + 5*(4)]; + for (jn = -2 + 5; jn >= 1 + -1; jn += -1) { + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + zqxn[jn] = zqxn[jn] / zqlhs[jn + 5*jn]; + } + + // Ensure no small values (including negatives) remain in cloud variables nor + // precipitation rates. + // Evaporate l,i,r,s to water vapour. Latent heating taken into account below + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + if (zqxn[jn] < zepsec) { + zqxn[4] = zqxn[4] + zqxn[jn]; + zqxn[jn] = (double) 0.0; + } + } + + //-------------------------------- + // variables needed for next level + //-------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxnm1[jm] = zqxn[jm]; + zqxn2d[jl + klon*(jk + klev*(jm + 5*ibl))] = zqxn[jm]; + } + + //------------------------------------------------------------------------ + // 5.3 Precipitation/sedimentation fluxes to next level + // diagnostic precipitation fluxes + // It is this scaled flux that must be used for source to next layer + //------------------------------------------------------------------------ + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[jl + klon*(1 + jk + (klev + 1)*(jm + 5*ibl))] = + zfallsink[jm]*zqxn[jm]*zrdtgdp; + } + + // Ensure precipitation fraction is zero if no precipitation + zqpretot = zpfplsx[jl + klon*(1 + jk + (klev + 1)*(3 + 5*ibl))] + + zpfplsx[jl + klon*(1 + jk + (klev + 1)*(2 + 5*ibl))]; + if (zqpretot < zepsec) { + zcovptot = (double) 0.0; + } + + //###################################################################### + // 6 *** UPDATE TENDANCIES *** + //###################################################################### + + //-------------------------------- + // 6.1 Temperature and CLV budgets + //-------------------------------- + + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + + // calculate fluxes in and out of box for conservation of TL + zfluxq[jm] = zpsupsatsrce[jm] + zconvsrce[jm] + zfallsrce[jm] - + (zfallsink[jm] + zconvsink[jm])*zqxn[jm]; + + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + klon*(jk + klev*ibl)] + + ralvdcp*(zqxn[jm] - zqx[jl + klon*(jk + klev*(jm + 5*ibl))] - zfluxq[jm])*zqtmst; + } + + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + klon*(jk + klev*ibl)] + + ralsdcp*(zqxn[jm] - zqx[jl + klon*(jk + klev*(jm + 5*ibl))] - zfluxq[jm])*zqtmst; + } + + //---------------------------------------------------------------------- + // New prognostic tendencies - ice,liquid rain,snow + // Note: CLV arrays use PCLV in calculation of tendency while humidity + // uses ZQX. This is due to clipping at start of cloudsc which + // include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + //---------------------------------------------------------------------- + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] + + (zqxn[jm] - zqx0[jl + klon*(jk + klev*(jm + 5*ibl))])*zqtmst; + + } + + //---------------------- + // 6.2 Humidity budget + //---------------------- + tendency_loc_q[jl + klon*(jk + klev*ibl)] = tendency_loc_q[jl + klon*(jk + klev*ibl)] + + (zqxn[4] - zqx[jl + klon*(jk + klev*(4 + 5*ibl))])*zqtmst; + + //------------------- + // 6.3 cloud cover + //----------------------- + tendency_loc_a[jl + klon*(jk + klev*ibl)] = tendency_loc_a[jl + klon*(jk + klev*ibl)] + zda*zqtmst; + + //-------------------------------------------------- + // Copy precipitation fraction into output variable + //------------------------------------------------- + pcovptot[jl + klon*(jk + klev*ibl)] = zcovptot; + + } + // on vertical level JK + //---------------------------------------------------------------------- + // END OF VERTICAL LOOP + //---------------------------------------------------------------------- + + //###################################################################### + // 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + //###################################################################### + + //-------------------------------------------------------------------- + // Copy general precip arrays back into PFP arrays for GRIB archiving + // Add rain and liquid fluxes, ice and snow fluxes + //-------------------------------------------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfplsl[jl + klon*(jk + (klev + 1)*ibl)] = zpfplsx[jl + klon*(jk + (klev + 1)*(2 + 5*ibl))] + + zpfplsx[jl + klon*(jk + (klev + 1)*(0 + 5*ibl))]; + pfplsn[jl + klon*(jk + (klev + 1)*ibl)] = zpfplsx[jl + klon*(jk + (klev + 1)*(3 + 5*ibl))] + + zpfplsx[jl + klon*(jk + (klev + 1)*(1 + 5*ibl))]; + } + + //-------- + // Fluxes: + //-------- + pfsqlf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqif[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqrf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqsf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqlng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqnng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqrng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //rain + pfcqsng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //snow + // fluxes due to turbulence + pfsqltur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqitur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + + for (jk = 0; jk <= klev + -1; jk += 1) { + + zgdph_r = -zrg_r*(paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - + paph[jl + klon*(jk + (klev + 1)*ibl)])*zqtmst; + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqltur[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqitur[jl + klon*(jk + (klev + 1)*ibl)]; + + zalfaw = zfoealfa[jl + klon*(jk + (klev + 1)*ibl)]; + + // Liquid , LS scheme minus detrainment + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(0 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(0 + 5*ibl))] + + pvfl[jl + klon*(jk + klev*ibl)]*ptsphy - zalfaw*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // liquid, negative numbers + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(0 + 5*ibl))]*zgdph_r; + + // liquid, vertical diffusion + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] + + pvfl[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // Rain, LS scheme + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(2 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(2 + 5*ibl))])*zgdph_r; + // rain, negative numbers + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(2 + 5*ibl))]*zgdph_r; + + // Ice , LS scheme minus detrainment + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(1 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(1 + 5*ibl))] + + pvfi[jl + klon*(jk + klev*ibl)]*ptsphy - ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // ice, negative numbers + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(1 + 5*ibl))]*zgdph_r; + + // ice, vertical diffusion + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] + + pvfi[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // snow, LS scheme + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(3 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(3 + 5*ibl))])*zgdph_r; + // snow, negative numbers + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(3 + 5*ibl))]*zgdph_r; + } + + //----------------------------------- + // enthalpy flux due to precipitation + //----------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfhpsl[jl + klon*(jk + (klev + 1)*ibl)] = -rlvtt*pfplsl[jl + klon*(jk + (klev + 1)*ibl)]; + pfhpsn[jl + klon*(jk + (klev + 1)*ibl)] = -rlstt*pfplsn[jl + klon*(jk + (klev + 1)*ibl)]; + } +} + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_c_hoist.h b/src/cloudsc_hip/cloudsc/cloudsc_c_hoist.h new file mode 100644 index 00000000..f8949673 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_c_hoist.h @@ -0,0 +1,50 @@ +#include "hip/hip_runtime.h" +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "hip/hip_runtime.h" +#include "yoecldp_c.h" +#include + +__global__ void __launch_bounds__(128, 1) cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2, + double * __restrict__ zfoealfa, double * __restrict__ ztp1, double * __restrict__ zli, + double * __restrict__ za, double * __restrict__ zaorig, double * __restrict__ zliqfrac, + double * __restrict__ zicefrac, double * __restrict__ zqx, double * __restrict__ zqx0, + double * __restrict__ zpfplsx, double * __restrict__ zlneg, double * __restrict__ zqxn2d, + double * __restrict__ zqsmix, double * __restrict__ zqsliq, double * __restrict__ zqsice, + double * __restrict__ zfoeewmt, double * __restrict__ zfoeew, double * __restrict__ zfoeeliqt); + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.cpp b/src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.cpp new file mode 100644 index 00000000..07d267cc --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.cpp @@ -0,0 +1,2630 @@ +#include "hip/hip_runtime.h" +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include "cloudsc_c.h" +#include + +__global__ void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2) { + + //------------------------------------------------------------------------------- + // Declare input/output arguments + //------------------------------------------------------------------------------- + + // PLCRIT_AER : critical liquid mmr for rain autoconversion process + // PICRIT_AER : critical liquid mmr for snow autoconversion process + // PRE_LIQ : liq Re + // PRE_ICE : ice Re + // PCCN : liquid cloud condensation nuclei + // PNICE : ice number concentration (cf. CCN) + + const int klev = 137; // Number of levels + + double zlcond1, zlcond2, zlevapl, zlevapi, zrainaut, zsnowaut, zliqcld, zicecld; + double zlevap, zleros; + // condensation and evaporation terms + // autoconversion terms + double zfokoop; + double zfoealfa; + double zicenuclei; // number concentration of ice nuclei + + double zlicld; + double zacond; + double zaeros; + double zlfinalsum; + double zdqs; + double ztold; + double zqold; + double zdtgdp; + double zrdtgdp; + double ztrpaus; + double zcovpclr; + double zpreclr; + double zcovptot; + double zcovpmax; + double zqpretot; + double zdpevap; + double zdtforc; + double zdtdiab; + double ztp1[2]; + double zldefr; + double zldifdt; + double zdtgdpf; + double zlcust[5]; + double zacust; + double zmf; + + double zrho; + double ztmp1, ztmp2, ztmp3; + double ztmp4, ztmp5, ztmp6, ztmp7; + double zalfawm; + + // Accumulators of A,B,and C factors for cloud equations + double zsolab; // -ve implicit CC + double zsolac; // linear CC + double zanew; + double zanewm1; + + double zgdp; + + //---for flux calculation + double zda; + double zli; + double za[2]; + double zaorig; + + int llflag; + int llo1; + + int icall, ik, jk, jl, jm, jn, jo, jlen, is; + + double zdp, zpaphd; + + double zalfa; + // & ZALFACU, ZALFALS + double zalfaw; + double zbeta, zbeta1; + double zcfpr; + double zcor; + double zcdmax; + double zmin; + double zlcondlim; + double zdenom; + double zdpmxdt; + double zdpr; + double zdtdp; + double ze; + double zepsec; + double zfac, zfaci, zfacw; + double zgdcp; + double zinew; + double zlcrit; + double zmfdn; + double zprecip; + double zqe; + double zqsat, zqtmst, zrdcp; + double zrhc, zsig, zsigk; + double zwtot; + double zzco, zzdl, zzrh, zzzdt, zqadj; + double zqnew, ztnew; + double zrg_r, zgdph_r, zcons1, zcond, zcons1a; + double zlfinal; + double zmelt; + double zevap; + double zfrz; + double zvpliq, zvpice; + double zadd, zbdd, zcvds, zice0, zdepos; + double zsupsat; + double zfall; + double zre_ice; + double zrldcp; + double zqp1env; + + //---------------------------- + // Arrays for new microphysics + //---------------------------- + int iphase[5]; // marker for water phase of each species + // 0=vapour, 1=liquid, 2=ice + + int imelt[5]; // marks melting linkage for ice categories + // ice->liquid, snow->rain + + int llfall[5]; // marks falling species + // LLFALL=0, cloud cover must > 0 for zqx > 0 + // LLFALL=1, no cloud needed, zqx can evaporate + + int llindex1[5]; // index variable + int llindex3[5*5]; // index variable + double zmax; + double zrat; + int iorder[5]; // array for sorting explicit terms + + double zliqfrac; + double zicefrac; + double zqx[5]; + double zqx0[5]; + double zqxn[5]; // new values for zqx at time+1 + double zqxfg[5]; // first guess values including precip + double zqxnm1[5]; // new values for zqx at time+1 at level above + double zfluxq[5]; // fluxes convergence of species (needed?) + // Keep the following for possible future total water variance scheme? + //REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + //REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + //REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + //REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + //REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + double zlneg[5]; + double zmeltmax; + double zfrzmax; + double zicetot; + + double zqxn2d[5]; + + double zqsmix; + //REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + double zqsliq; + double zqsice; + + //REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + //REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + //REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + double zfoeewmt; + double zfoeew; + double zfoeeliqt; + + //REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + double zdqsliqdt, zdqsicedt, zdqsmixdt; + double zcorqsliq; + double zcorqsice; + //REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + double zcorqsmix; + double zevaplimliq, zevaplimice, zevaplimmix; + + //------------------------------------------------------- + // SOURCE/SINK array for implicit and explicit terms + //------------------------------------------------------- + // a POSITIVE value entered into the arrays is a... + // Source of this variable + // | + // | Sink of this variable + // | | + // V V + // ZSOLQA(JL,IQa,IQb) = explicit terms + // ZSOLQB(JL,IQa,IQb) = implicit terms + // Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + // a source of NCLDQL and a sink of IQV + // put 'magic' source terms such as PLUDE from + // detrainment into explicit source/sink array diagnognal + // ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + // i.e. A positive value is a sink!????? weird... + //------------------------------------------------------- + + double zsolqa[5*5]; // explicit sources and sinks + double zsolqb[5*5]; // implicit sources and sinks + // e.g. microphysical pathways between ice variables. + double zqlhs[5*5]; // n x n matrix storing the LHS of implicit solver + double zvqx[5]; // fall speeds of three categories + double zexplicit; + double zratio[5], zsinksum[5]; + + // for sedimentation source/sink terms + double zfallsink[5]; + double zfallsrce[5]; + + // for convection detrainment source and subsidence source/sink terms + double zconvsrce[5]; + double zconvsink[5]; + + // for supersaturation source term from previous timestep + double zpsupsatsrce[5]; + + // Numerical fit to wet bulb temperature + double ztw1 = (double) 1329.31; + double ztw2 = (double) 0.0074615; + double ztw3 = (double) 0.85E5; + double ztw4 = (double) 40.637; + double ztw5 = (double) 275.0; + + double zsubsat; // Subsaturation for snow melting term + double ztdmtw0; // Diff between dry-bulb temperature and + // temperature when wet-bulb = 0degC + + // Variables for deposition term + double ztcg; // Temperature dependent function for ice PSD + double zfacx1i, zfacx1s; // PSD correction factor + double zaplusb, zcorrfac, zcorrfac2, zpr02, zterm1, zterm2; // for ice dep + double zcldtopdist; // Distance from cloud top + double zinfactor; // No. of ice nuclei factor for deposition + + // Autoconversion/accretion/riming/evaporation + int iwarmrain; + int ievaprain; + int ievapsnow; + int idepice; + double zrainacc; + double zraincld; + double zsnowrime; + double zsnowcld; + double zesatliq; + double zfallcorr; + double zlambda; + double zevap_denom; + double zcorr2; + double zka; + double zconst; + double ztemp; + + // Rain freezing + int llrainliq; // True if majority of raindrops are liquid (no ice core) + + //---------------------------- + // End: new microphysics + //---------------------------- + + //---------------------- + // SCM budget statistics + //---------------------- + double zrain; + + double zhook_handle; + double ztmpl, ztmpi, ztmpa; + + double zmm, zrr; + double zrg; + + double zzsum, zzratio; + double zepsilon; + + double zcond1, zqp; + + double psum_solqa; + + int ibl; + int i_llfall_0; + //double zqx[5 * klev]; + //double zqx0[5 * klev]; + double zpfplsx[5 * 2]; + //double zlneg[5 * klev]; + //double zqxn2d[5 * klev]; + + jl = threadIdx.x; + ibl = blockIdx.x; + + int jk_i; + int jk_ip1; + int jk_im1; + + zepsilon = (double) 100.*DBL_EPSILON; + + // --------------------------------------------------------------------- + // Set version of warm-rain autoconversion/accretion + // IWARMRAIN = 1 ! Sundquist + // IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + // --------------------------------------------------------------------- + iwarmrain = 2; + // --------------------------------------------------------------------- + // Set version of rain evaporation + // IEVAPRAIN = 1 ! Sundquist + // IEVAPRAIN = 2 ! Abel and Boutle (2013) + // --------------------------------------------------------------------- + ievaprain = 2; + // --------------------------------------------------------------------- + // Set version of snow evaporation + // IEVAPSNOW = 1 ! Sundquist + // IEVAPSNOW = 2 ! New + // --------------------------------------------------------------------- + ievapsnow = 1; + // --------------------------------------------------------------------- + // Set version of ice deposition + // IDEPICE = 1 ! Rotstayn (2001) + // IDEPICE = 2 ! New + // --------------------------------------------------------------------- + idepice = 1; + + // --------------------- + // Some simple constants + // --------------------- + zqtmst = (double) 1.0 / ptsphy; + zgdcp = rg / rcpd; + zrdcp = rd / rcpd; + zcons1a = rcpd / (rlmlt*rg*(*yrecldp).rtaumel); + zepsec = (double) 1.E-14; + zrg_r = (double) 1.0 / rg; + zrldcp = (double) 1.0 / (ralsdcp - ralvdcp); + + // Note: Defined in module/yoecldp.F90 + // NCLDQL=1 ! liquid cloud water + // NCLDQI=2 ! ice cloud water + // NCLDQR=3 ! rain water + // NCLDQS=4 ! snow + // NCLDQV=5 ! vapour + + // ----------------------------------------------- + // Define species phase, 0=vapour, 1=liquid, 2=ice + // ----------------------------------------------- + iphase[4] = 0; + iphase[0] = 1; + iphase[2] = 1; + iphase[1] = 2; + iphase[3] = 2; + + // --------------------------------------------------- + // Set up melting/freezing index, + // if an ice category melts/freezes, where does it go? + // --------------------------------------------------- + imelt[4] = -99; + imelt[0] = 2; + imelt[2] = 4; + imelt[1] = 3; + imelt[3] = 3; + + // ----------------------------------------------- + // INITIALIZATION OF OUTPUT TENDENCIES + // ----------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_a[jl + klon*(jk + klev*ibl)] = (double) 0.0; + } + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; + } + } + + //-- These were uninitialized : meaningful only when we compare error differences + for (jk = 0; jk <= klev + -1; jk += 1) { + pcovptot[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_cld[jl + klon*(jk + klev*(5 - 1 + 5*(ibl)))] = (double) 0.0; + } + + //-------- + // Fluxes: + //-------- + pfsqlf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqif[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqrf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqsf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqlng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqnng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqrng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //rain + pfcqsng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //snow + // fluxes due to turbulence + pfsqltur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqitur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + + // ------------------------- + // set up fall speeds in m/s + // ------------------------- + zvqx[4] = (double) 0.0; + zvqx[0] = (double) 0.0; + zvqx[1] = (*yrecldp).rvice; + zvqx[2] = (*yrecldp).rvrain; + zvqx[3] = (*yrecldp).rvsnow; + for (i_llfall_0 = 0; i_llfall_0 <= 5 + -1; i_llfall_0 += 1) { + llfall[i_llfall_0] = false; + } + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (zvqx[jm] > (double) 0.0) { + llfall[jm] = true; + } + // falling species + } + // Set LLFALL to false for ice (but ice still sediments!) + // Need to rationalise this at some point + llfall[1] = false; + + prainfrac_toprfz[jl + klon*ibl] = (double) 0.0; // rain fraction at top of refreezing layer + llrainliq = true; // Assume all raindrops are liquid initially + + //###################################################################### + // 1. *** INITIAL VALUES FOR VARIABLES *** + //###################################################################### + + //----------------------------- + // Reset single level variables + //----------------------------- + + zanewm1 = (double) 0.0; + zda = (double) 0.0; + zcovpclr = (double) 0.0; + zcovpmax = (double) 0.0; + zcovptot = (double) 0.0; + zcldtopdist = (double) 0.0; + //------------- + // zero arrays + //------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[0 + 2*jm] = (double) 0.0; // precip fluxes + zpfplsx[1 + 2*jm] = (double) 0.0; + } + + // ---------------------- + // non CLV initialization + // ---------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + + // Fortran counting is beautiful! + jk_i = (jk + 1) % 2; + jk_ip1 = (jk + 2) % 2; + jk_im1 = (jk) % 2; + + if (1 <= jk + 1 && jk + 1 <= klev) { + ztp1[jk_i] = pt[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_t[jl + klon*(jk + klev*ibl)]; + zqx[4] = pq[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + zqx0[4] = pq[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + za[jk_i] = pa[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + zaorig = pa[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + + // ------------------------------------- + // initialization for CLV family + // ------------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + zqx[jm] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + ptsphy* + tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqx0[jm] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + ptsphy* + tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + } + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxn2d[jm] = (double) 0.0; // end of timestep values in 2D + zlneg[jm] = (double) 0.0; // negative input check + } + + + // ---------------------------------------------------- + // Tidy up very small cloud cover or total cloud water + // ---------------------------------------------------- + if (zqx[0] + zqx[1] < (*yrecldp).rlmin || za[jk_i] < (*yrecldp) + .ramin) { + + // Evaporate small cloud liquid water amounts + zlneg[0] = zlneg[0] + zqx[0]; + zqadj = zqx[0]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + zqx[4] = zqx[4] + zqx[0]; + zqx[0] = (double) 0.0; + + // Evaporate small cloud ice water amounts + zlneg[1] = zlneg[1] + zqx[1]; + zqadj = zqx[1]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + zqx[4] = zqx[4] + zqx[1]; + zqx[1] = (double) 0.0; + + // Set cloud cover to zero + za[jk_i] = (double) 0.0; + + } + + // --------------------------------- + // Tidy up small CLV variables + // --------------------------------- + //DIR$ IVDEP + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + if (zqx[jm] < (*yrecldp).rlmin) { + zlneg[jm] = zlneg[jm] + zqx[jm]; + zqadj = zqx[jm]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + } + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + } + zqx[4] = zqx[4] + zqx[jm]; + zqx[jm] = (double) 0.0; + } + } + + // ------------------------------ + // Define saturation values + // ------------------------------ + //---------------------------------------- + // old *diagnostic* mixed phase saturation + //---------------------------------------- + zfoealfa = ((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))); + zfoeewmt = fmin(((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))) / pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsmix = zfoeewmt; + zqsmix = zqsmix / ((double) 1.0 - retv*zqsmix); + + //--------------------------------------------- + // ice saturation T<273K + // liquid water saturation for T>273K + //--------------------------------------------- + zalfa = ((double)(fmax(0.0, copysign(1.0, ztp1[jk_i] - rtt)))); + zfoeew = fmin((zalfa*((double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)))) + + ((double) 1.0 - zalfa)*((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))) / + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zfoeew = fmin((double) 0.5, zfoeew); + zqsice = zfoeew / ((double) 1.0 - retv*zfoeew); + + //---------------------------------- + // liquid water saturation + //---------------------------------- + zfoeeliqt = fmin(((double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)))) / + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsliq = zfoeeliqt; + zqsliq = zqsliq / ((double) 1.0 - retv*zqsliq); + + // !---------------------------------- + // ! ice water saturation + // !---------------------------------- + // ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + // ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + // ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-retv*ZQSICE(JL,JK)) + + + //------------------------------------------ + // Ensure cloud fraction is between 0 and 1 + //------------------------------------------ + za[jk_i] = fmax((double) 0.0, fmin((double) 1.0, za[jk_i])); + + //------------------------------------------------------------------- + // Calculate liq/ice fractions (no longer a diagnostic relationship) + //------------------------------------------------------------------- + zli = zqx[0] + zqx[1]; + if (zli > (*yrecldp).rlmin) { + zliqfrac = zqx[0] / zli; + zicefrac = (double) 1.0 - zliqfrac; + } else { + zliqfrac = (double) 0.0; + zicefrac = (double) 0.0; + } + + //###################################################################### + // 2. *** CONSTANTS AND PARAMETERS *** + //###################################################################### + // Calculate L in updrafts of bl-clouds + // Specify QS, P/PS for tropopause (for c2) + // And initialize variables + //------------------------------------------ + + //--------------------------------- + // Find tropopause level (ZTRPAUS) + //--------------------------------- + //ZTRPAUS = 0.1_JPRB + //ZPAPHD = 1.0_JPRB / PAPH(JL, KLEV + 1, IBL) + //DO JK=1,KLEV - 1 + // ZSIG = PAP(JL, JK, IBL)*ZPAPHD + // IF (ZSIG > 0.1_JPRB .and. ZSIG < 0.4_JPRB .and. ZTP1(JK_I) > ZTP1(JL, JK + 1, IBL)) THEN + // ZTRPAUS = ZSIG + // END IF + //END DO + + //----------------------------- + // Reset single level variables + //----------------------------- + + //ZANEWM1 = 0.0_JPRB + //ZDA = 0.0_JPRB + //ZCOVPCLR = 0.0_JPRB + //ZCOVPMAX = 0.0_JPRB + //ZCOVPTOT = 0.0_JPRB + //ZCLDTOPDIST = 0.0_JPRB + + //###################################################################### + // 3. *** PHYSICS *** + //###################################################################### + + + //---------------------------------------------------------------------- + // START OF VERTICAL LOOP + //---------------------------------------------------------------------- + + if ((*yrecldp).ncldtop <= jk + 1 && jk + 1 <= klev) { + + //---------------------------------------------------------------------- + // 3.0 INITIALIZE VARIABLES + //---------------------------------------------------------------------- + + //--------------------------------- + // First guess microphysics + //--------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxfg[jm] = zqx[jm]; + } + + //--------------------------------- + // Set KLON arrays to zero + //--------------------------------- + + zlicld = (double) 0.0; + zrainaut = (double) 0.0; // currently needed for diags + zrainacc = (double) 0.0; // currently needed for diags + zsnowaut = (double) 0.0; // needed + zldefr = (double) 0.0; + zacust = (double) 0.0; // set later when needed + zqpretot = (double) 0.0; + zlfinalsum = (double) 0.0; + + // Required for first guess call + zlcond1 = (double) 0.0; + zlcond2 = (double) 0.0; + zsupsat = (double) 0.0; + zlevapl = (double) 0.0; + zlevapi = (double) 0.0; + + //------------------------------------- + // solvers for cloud fraction + //------------------------------------- + zsolab = (double) 0.0; + zsolac = (double) 0.0; + + zicetot = (double) 0.0; + + //------------------------------------------ + // reset matrix so missing pathways are set + //------------------------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsolqb[jn + 5*jm] = (double) 0.0; + zsolqa[jn + 5*jm] = (double) 0.0; + } + } + + //---------------------------------- + // reset new microphysics variables + //---------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zfallsrce[jm] = (double) 0.0; + zfallsink[jm] = (double) 0.0; + zconvsrce[jm] = (double) 0.0; + zconvsink[jm] = (double) 0.0; + zpsupsatsrce[jm] = (double) 0.0; + zratio[jm] = (double) 0.0; + } + + + //------------------------- + // derived variables needed + //------------------------- + + zdp = paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - paph[jl + klon*(jk + (klev + + 1)*ibl)]; // dp + zgdp = rg / zdp; // g/dp + zrho = pap[jl + klon*(jk + klev*ibl)] / (rd*ztp1[jk_i]); // p/RT air density + + zdtgdp = ptsphy*zgdp; // dt g/dp + zrdtgdp = zdp*((double) 1.0 / (ptsphy*rg)); // 1/(dt g/dp) + + if (jk + 1 > 1) { + zdtgdpf = ptsphy*rg / (pap[jl + klon*(jk + klev*ibl)] - pap[jl + klon*(-1 + + jk + klev*ibl)]); + } + + //------------------------------------ + // Calculate dqs/dT correction factor + //------------------------------------ + // Reminder: retv=rv/rd-1 + + // liquid + zfacw = r5les / (pow((ztp1[jk_i] - r4les), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeeliqt); + zdqsliqdt = zfacw*zcor*zqsliq; + zcorqsliq = (double) 1.0 + ralvdcp*zdqsliqdt; + + // ice + zfaci = r5ies / (pow((ztp1[jk_i] - r4ies), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeew); + zdqsicedt = zfaci*zcor*zqsice; + zcorqsice = (double) 1.0 + ralsdcp*zdqsicedt; + + // diagnostic mixed + zalfaw = zfoealfa; + zalfawm = zalfaw; + zfac = zalfaw*zfacw + ((double) 1.0 - zalfaw)*zfaci; + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeewmt); + zdqsmixdt = zfac*zcor*zqsmix; + zcorqsmix = (double) 1.0 + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + ralvdcp + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zdqsmixdt; + + // evaporation/sublimation limits + zevaplimmix = fmax((zqsmix - zqx[4]) / zcorqsmix, (double) 0.0); + zevaplimliq = fmax((zqsliq - zqx[4]) / zcorqsliq, (double) 0.0); + zevaplimice = fmax((zqsice - zqx[4]) / zcorqsice, (double) 0.0); + + //-------------------------------- + // in-cloud consensate amount + //-------------------------------- + ztmpa = (double) 1.0 / fmax(za[jk_i], zepsec); + zliqcld = zqx[0]*ztmpa; + zicecld = zqx[1]*ztmpa; + zlicld = zliqcld + zicecld; + + + //------------------------------------------------ + // Evaporate very small amounts of liquid and ice + //------------------------------------------------ + + if (zqx[0] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(0)] = zqx[0]; + zsolqa[0 + 5*(4)] = -zqx[0]; + } + + if (zqx[1] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(1)] = zqx[1]; + zsolqa[1 + 5*(4)] = -zqx[1]; + } + + + //--------------------------------------------------------------------- + // 3.1 ICE SUPERSATURATION ADJUSTMENT + //--------------------------------------------------------------------- + // Note that the supersaturation adjustment is made with respect to + // liquid saturation: when T>0C + // ice saturation: when T<0C + // with an adjustment made to allow for ice + // supersaturation in the clear sky + // Note also that the KOOP factor automatically clips the supersaturation + // to a maximum set by the liquid water saturation mixing ratio + // important for temperatures near to but below 0C + //----------------------------------------------------------------------- + + //DIR$ NOFUSION + + //----------------------------------- + // 3.1.1 Supersaturation limit (from Koop) + //----------------------------------- + // Needs to be set for all temperatures + zfokoop = ((double)(fmin(rkoop1 - rkoop2*ztp1[jk_i], (double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/ + (ztp1[jk_i] - r4les)))*1.0/(double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies)))))); + + if (ztp1[jk_i] >= rtt || (*yrecldp).nssopt == 0) { + zfac = (double) 1.0; + zfaci = (double) 1.0; + } else { + zfac = za[jk_i] + zfokoop*((double) 1.0 - za[jk_i]); + zfaci = ptsphy / (*yrecldp).rkooptau; + } + + //------------------------------------------------------------------- + // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + // correction factor + // [#Note: QSICE or QSLIQ] + //------------------------------------------------------------------- + + // Calculate supersaturation to add to cloud + if (za[jk_i] > (double) 1.0 - (*yrecldp).ramin) { + zsupsat = fmax((zqx[4] - zfac*zqsice) / zcorqsice, (double) 0.0); + } else { + // Calculate environmental humidity supersaturation + zqp1env = (zqx[4] - za[jk_i]*zqsice) / fmax((double) 1.0 - za[jk_i], zepsilon); + //& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + zsupsat = fmax(((double) 1.0 - za[jk_i])*(zqp1env - zfac*zqsice) / + zcorqsice, (double) 0.0); + } + + //------------------------------------------------------------------- + // Here the supersaturation is turned into liquid water + // However, if the temperature is below the threshold for homogeneous + // freezing then the supersaturation is turned instantly to ice. + //-------------------------------------------------------------------- + + if (zsupsat > zepsec) { + + if (ztp1[jk_i] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zsupsat; + // Include liquid in first guess + zqxfg[0] = zqxfg[0] + zsupsat; + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zsupsat; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + zsupsat; + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk_i])*zfaci; + + } + + //------------------------------------------------------- + // 3.1.3 Include supersaturation from previous timestep + // (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + //------------------------------------------------------- + if (psupsat[jl + klon*(jk + klev*ibl)] > zepsec) { + if (ztp1[jk_i] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(0)] = + zsolqa[0 + 5*(0)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[0] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add liquid to first guess for deposition term + zqxfg[0] = zqxfg[0] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(1)] = + zsolqa[1 + 5*(1)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[1] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk_i])*zfaci; + // Store cloud budget diagnostics if required + } + + // on JL + + //--------------------------------------------------------------------- + // 3.2 DETRAINMENT FROM CONVECTION + //--------------------------------------------------------------------- + // * Diagnostic T-ice/liq split retained for convection + // Note: This link is now flexible and a future convection + // scheme can detrain explicit seperate budgets of: + // cloud water, ice, rain and snow + // * There is no (1-ZA) multiplier term on the cloud detrainment + // term, since is now written in mass-flux terms + // [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + //--------------------------------------------------------------------- + if (jk + 1 < klev && jk + 1 >= (*yrecldp).ncldtop) { + + + plude[jl + klon*(jk + klev*ibl)] = plude[jl + klon*(jk + klev*ibl)]*zdtgdp; + + if (/*ldcum[jl + klon*ibl] &&*/ plude[jl + klon*(jk + klev*ibl)] > (*yrecldp) + .rlmin && plu[jl + klon*(1 + jk + klev*ibl)] > zepsec) { + + zsolac = zsolac + plude[jl + klon*(jk + klev*ibl)] / plu[jl + klon*(1 + jk + + klev*ibl)]; + // *diagnostic temperature split* + zalfaw = zfoealfa; + zconvsrce[0] = zalfaw*plude[jl + klon*(jk + klev*ibl)]; + zconvsrce[1] = + ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)]; + zsolqa[0 + 5*(0)] = + zsolqa[0 + 5*(0)] + zconvsrce[0]; + zsolqa[1 + 5*(1)] = + zsolqa[1 + 5*(1)] + zconvsrce[1]; + + } else { + + plude[jl + klon*(jk + klev*ibl)] = (double) 0.0; + + } + // *convective snow detrainment source + //if (ldcum[jl + klon*ibl]) { + zsolqa[3 + 5*(3)] = + zsolqa[3 + 5*(3)] + psnde[jl + klon*(jk + klev*ibl)]*zdtgdp; + //} + + + } + // JK (*yrecldp).ncldtop) { + + zmf = fmax((double) 0.0, (pmfu[jl + klon*(jk + klev*ibl)] + pmfd[jl + + klon*(jk + klev*ibl)])*zdtgdp); + zacust = zmf*zanewm1; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlcust[jm] = zmf*zqxnm1[jm]; + // record total flux for enthalpy budget: + zconvsrce[jm] = zconvsrce[jm] + zlcust[jm]; + } + } + + // Now have to work out how much liquid evaporates at arrival point + // since there is no prognostic memory for in-cloud humidity, i.e. + // we always assume cloud is saturated. + + zdtdp = zrdcp*(double) 0.5*(ztp1[jk_im1] + ztp1[jk_i]) / paph[jl + + klon*(jk + (klev + 1)*ibl)]; + zdtforc = zdtdp*(pap[jl + klon*(jk + klev*ibl)] - pap[jl + klon*(-1 + jk + + klev*ibl)]); + //[#Note: Diagnostic mixed phase should be replaced below] + zdqs = zanewm1*zdtforc*zdqsmixdt; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlfinal = fmax((double) 0.0, zlcust[jm] - zdqs); //lim to zero + // no supersaturation allowed incloud ---V + zevap = fmin((zlcust[jm] - zlfinal), zevaplimmix); + // ZEVAP=0.0_JPRB + zlfinal = zlcust[jm] - zevap; + zlfinalsum = zlfinalsum + zlfinal; // sum + + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zlcust[jm]; // whole sum + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zevap; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zevap; + } + } + + // Reset the cloud contribution if no cloud water survives to this level: + if (zlfinalsum < zepsec) { + zacust = (double) 0.0; + } + zsolac = zsolac + zacust; + + } + // on JK>NCLDTOP + + //--------------------------------------------------------------------- + // Subsidence sink of cloud to the layer below + // (Implicit - re. CFL limit on convective mass flux) + //--------------------------------------------------------------------- + + + if (jk + 1 < klev) { + + zmfdn = fmax((double) 0.0, (pmfu[jl + klon*(1 + jk + klev*ibl)] + pmfd[jl + + klon*(1 + jk + klev*ibl)])*zdtgdp); + + zsolab = zsolab + zmfdn; + zsolqb[0 + 5*(0)] = zsolqb[0 + 5*(0)] + zmfdn; + zsolqb[1 + 5*(1)] = zsolqb[1 + 5*(1)] + zmfdn; + + // Record sink for cloud budget and enthalpy budget diagnostics + zconvsink[0] = zmfdn; + zconvsink[1] = zmfdn; + + } + + + //---------------------------------------------------------------------- + // 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + //---------------------------------------------------------------------- + // NOTE: In default tiedtke scheme this process decreases the cloud + // area but leaves the specific cloud water content + // within clouds unchanged + //---------------------------------------------------------------------- + + // ------------------------------ + // Define turbulent erosion rate + // ------------------------------ + zldifdt = (*yrecldp).rcldiff*ptsphy; //original version + //Increase by factor of 5 for convective points + if (ktype[jl + klon*ibl] > 0 && plude[jl + klon*(jk + klev*ibl)] > zepsec) { + zldifdt = (*yrecldp).rcldiff_convi*zldifdt; + } + + // At the moment, works on mixed RH profile and partitioned ice/liq fraction + // so that it is similar to previous scheme + // Should apply RHw for liquid cloud and RHi for ice cloud separately + if (zli > zepsec) { + // Calculate environmental humidity + // ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + // & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + // ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ze = zldifdt*fmax(zqsmix - zqx[4], (double) 0.0); + zleros = za[jk_i]*ze; + zleros = fmin(zleros, zevaplimmix); + zleros = fmin(zleros, zli); + zaeros = zleros / zlicld; //if linear term + + // Erosion is -ve LINEAR in L,A + zsolac = zsolac - zaeros; //linear + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac*zleros; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac*zleros; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac*zleros; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac*zleros; + + } + + //---------------------------------------------------------------------- + // 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + //---------------------------------------------------------------------- + // calculate dqs/dt + // Note: For the separate prognostic Qi and Ql, one would ideally use + // Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + // forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + // These would then instantaneous freeze if T<-38C or lead to ice growth + // by deposition in warmer mixed phase clouds. However, since we do + // not have a separate prognostic equation for in-cloud humidity or a + // statistical scheme approach in place, the depositional growth of ice + // in the mixed phase can not be modelled and we resort to supersaturation + // wrt ice instanteously converting to ice over one timestep + // (see Tompkins et al. QJRMS 2007 for details) + // Thus for the initial implementation the diagnostic mixed phase is + // retained for the moment, and the level of approximation noted. + //---------------------------------------------------------------------- + + zdtdp = zrdcp*ztp1[jk_i] / pap[jl + klon*(jk + klev*ibl)]; + zdpmxdt = zdp*zqtmst; + zmfdn = (double) 0.0; + if (jk + 1 < klev) { + zmfdn = + pmfu[jl + klon*(1 + jk + klev*ibl)] + pmfd[jl + klon*(1 + jk + klev*ibl)]; + } + zwtot = pvervel[jl + klon*(jk + klev*ibl)] + (double) 0.5*rg*(pmfu[jl + + klon*(jk + klev*ibl)] + pmfd[jl + klon*(jk + klev*ibl)] + zmfdn); + zwtot = fmin(zdpmxdt, fmax(-zdpmxdt, zwtot)); + zzzdt = phrsw[jl + klon*(jk + klev*ibl)] + phrlw[jl + klon*(jk + klev*ibl)]; + zdtdiab = + fmin(zdpmxdt*zdtdp, fmax(-zdpmxdt*zdtdp, zzzdt))*ptsphy + ralfdcp*zldefr; + // Note: ZLDEFR should be set to the difference between the mixed phase functions + // in the convection and cloud scheme, but this is not calculated, so is zero and + // the functions must be the same + zdtforc = zdtdp*zwtot*ptsphy + zdtdiab; + zqold = zqsmix; + ztold = ztp1[jk_i]; + ztp1[jk_i] = ztp1[jk_i] + zdtforc; + ztp1[jk_i] = fmax(ztp1[jk_i], (double) 160.0); + llflag = true; + + // Formerly a call to CUADJTQ(..., ICALL=5) + zqp = (double) 1.0 / pap[jl + klon*(jk + klev*ibl)]; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond = (zqsmix - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jk_i] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jk_i] - r4ies, 2))))); + ztp1[jk_i] = ztp1[jk_i] + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond; + zqsmix = zqsmix - zcond; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond1 = (zqsmix - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jk_i] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jk_i] - r4ies, 2))))); + ztp1[jk_i] = ztp1[jk_i] + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond1; + zqsmix = zqsmix - zcond1; + + zdqs = zqsmix - zqold; + zqsmix = zqold; + ztp1[jk_i] = ztold; + + //---------------------------------------------------------------------- + // 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + // ---------------------------------------------------------------------- + // Erosion term is LINEAR in L + // Changed to be uniform distribution in cloud region + + + // Previous function based on DELTA DISTRIBUTION in cloud: + if (zdqs > (double) 0.0) { + // If subsidence evaporation term is turned off, then need to use updated + // liquid and cloud here? + // ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + zlevap = za[jk_i]*fmin(zdqs, zlicld); + zlevap = fmin(zlevap, zevaplimmix); + zlevap = fmin(zlevap, fmax(zqsmix - zqx[4], (double) 0.0)); + + // For first guess call + zlevapl = zliqfrac*zlevap; + zlevapi = zicefrac*zlevap; + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac*zlevap; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac*zlevap; + + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac*zlevap; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac*zlevap; + + } + + + //---------------------------------------------------------------------- + // 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + //---------------------------------------------------------------------- + // (1) Increase of cloud water in existing clouds + if (za[jk_i] > zepsec && zdqs <= -(*yrecldp).rlmin) { + + zlcond1 = fmax(-zdqs, (double) 0.0); //new limiter + + //old limiter (significantly improves upper tropospheric humidity rms) + if (za[jk_i] > (double) 0.99) { + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsmix); + zcdmax = (zqx[4] - zqsmix) / ((double) 1.0 + zcor*zqsmix*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jk_i] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jk_i] - r4ies, 2))))); + } else { + zcdmax = (zqx[4] - za[jk_i]*zqsmix) / za[jk_i]; + } + zlcond1 = fmax(fmin(zlcond1, zcdmax), (double) 0.0); + // end old limiter + + zlcond1 = za[jk_i]*zlcond1; + if (zlcond1 < (*yrecldp).rlmin) { + zlcond1 = (double) 0.0; + } + + //------------------------------------------------------------------------- + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------- + if (ztp1[jk_i] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond1; + zqxfg[0] = zqxfg[0] + zlcond1; + } else { + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond1; + zqxfg[1] = zqxfg[1] + zlcond1; + } + } + + // (2) Generation of new clouds (da/dt>0) + + + if (zdqs <= -(*yrecldp).rlmin && za[jk_i] < (double) 1.0 - zepsec) { + + //--------------------------- + // Critical relative humidity + //--------------------------- + zrhc = (*yrecldp).ramid; + zsigk = + pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]; + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + if (zsigk > (double) 0.8) { + zrhc = (*yrecldp).ramid + ((double) 1.0 - (*yrecldp).ramid)*(pow(((zsigk - + (double) 0.8) / (double) 0.2), 2)); + } + + // Commented out for CY37R1 to reduce humidity in high trop and strat + // ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + // ZBOTT=ZTRPAUS(JL)+0.2_JPRB + // IF(ZSIGK < ZBOTT) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + // ENDIF + + //--------------------------- + // Supersaturation options + //--------------------------- + if ((*yrecldp).nssopt == 0) { + // No scheme + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 1) { + // Tompkins + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 2) { + // Lohmann and Karcher + zqe = zqx[4]; + } else if ((*yrecldp).nssopt == 3) { + // Gierens + zqe = zqx[4] + zli; + } + + if (ztp1[jk_i] >= rtt || (*yrecldp).nssopt == 0) { + // No ice supersaturation allowed + zfac = (double) 1.0; + } else { + // Ice supersaturation + zfac = zfokoop; + } + + if (zqe >= zrhc*zqsice*zfac && zqe < zqsice*zfac) { + // note: not **2 on 1-a term if ZQE is used. + // Added correction term ZFAC to numerator 15/03/2010 + zacond = -((double) 1.0 - za[jk_i])*zfac*zdqs / fmax((double) + 2.0*(zfac*zqsice - zqe), zepsec); + + zacond = fmin(zacond, (double) 1.0 - za[jk_i]); //PUT THE LIMITER BACK + + // Linear term: + // Added correction term ZFAC 15/03/2010 + zlcond2 = -zfac*zdqs*(double) 0.5*zacond; //mine linear + + // new limiter formulation + zzdl = (double) 2.0*(zfac*zqsice - zqe) / fmax(zepsec, (double) 1.0 - za[jk_i]); + // Added correction term ZFAC 15/03/2010 + if (zfac*zdqs < -zzdl) { + // ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + zlcondlim = + (za[jk_i] - (double) 1.0)*zfac*zdqs - zfac*zqsice + zqx[4]; + zlcond2 = fmin(zlcond2, zlcondlim); + } + zlcond2 = fmax(zlcond2, (double) 0.0); + + if (zlcond2 < (*yrecldp).rlmin || ((double) 1.0 - za[jk_i]) < zepsec + ) { + zlcond2 = (double) 0.0; + zacond = (double) 0.0; + } + if (zlcond2 == (double) 0.0) { + zacond = (double) 0.0; + } + + // Large-scale generation is LINEAR in A and LINEAR in L + zsolac = zsolac + zacond; //linear + + //------------------------------------------------------------------------ + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------ + if (ztp1[jk_i] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond2; + zqxfg[0] = zqxfg[0] + zlcond2; + } else { + // homogeneous freezing + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond2; + zqxfg[1] = zqxfg[1] + zlcond2; + } + + } + } + + //---------------------------------------------------------------------- + // 3.7 Growth of ice by vapour deposition + //---------------------------------------------------------------------- + // Following Rotstayn et al. 2001: + // does not use the ice nuclei number from cloudaer.F90 + // but rather a simple Meyers et al. 1992 form based on the + // supersaturation and assuming clouds are saturated with + // respect to liquid water (well mixed), (or Koop adjustment) + // Growth considered as sink of liquid water if present so + // Bergeron-Findeisen adjustment in autoconversion term no longer needed + //---------------------------------------------------------------------- + + //-------------------------------------------------------- + //- + //- Ice deposition following Rotstayn et al. (2001) + //- (monodisperse ice particle size distribution) + //- + //-------------------------------------------------------- + if (idepice == 1) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*rg) + //-------------------------------------------------------------- + + if (za[jk_im1] < (*yrecldp).rcldtopcf && za[jk_i] >= (*yrecldp) + .rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk_i] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = (((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))*rv) / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq + - (double) 0.639); + + //------------------------------------------------ + // 2.4e-2 is conductivity of air + // 8.8 = 700**1/3 = density of ice to the third + //------------------------------------------------ + zadd = rlstt*(rlstt / (rv*ztp1[jk_i]) - (double) 1.0) / ((double) + 2.4E-2*ztp1[jk_i]); + zbdd = rv*ztp1[jk_i]*pap[jl + klon*(jk + klev*ibl)] / ((double) + 2.21*zvpice); + zcvds = (double) 7.8*(pow((zicenuclei / zrho), (double) 0.666))*(zvpliq - + zvpice) / ((double) 8.87*(zadd + zbdd)*zvpice); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + //------------------ + // new value of ice: + //------------------ + zinew = pow(((double) 0.666*zcvds*ptsphy + (pow(zice0, (double) 0.666))), + (double) 1.5); + + //--------------------------- + // grid-mean deposition rate: + //--------------------------- + zdepos = fmax(za[jk_i]*(zinew - zice0), (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0 + ); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + + } + + //-------------------------------------------------------- + //- + //- Ice deposition assuming ice PSD + //- + //-------------------------------------------------------- + } else if (idepice == 2) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*rg) + //-------------------------------------------------------------- + + if (za[jk_im1] < (*yrecldp).rcldtopcf && za[jk_i] >= (*yrecldp) + .rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk_i] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = (((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))*rv) / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq + - (double) 0.639); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + // Particle size distribution + ztcg = (double) 1.0; + zfacx1i = (double) 1.0; + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk_i] + + pap[jl + klon*(jk + klev*ibl)]*(*yrecldp).rcl_apb3*(pow(ztp1[jk_i], (double) 3.)); + zcorrfac = pow(((double) 1.0 / zrho), (double) 0.5); + zcorrfac2 = (pow((ztp1[jk_i] / (double) 273.0), (double) 1.5)) + *((double) 393.0 / (ztp1[jk_i] + (double) 120.0)); + + zpr02 = zrho*zice0*(*yrecldp).rcl_const1i / (ztcg*zfacx1i); + + zterm1 = (zvpliq - zvpice)*(pow(ztp1[jk_i], (double) 2.0)) + *zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2i*zfacx1i / + (zrho*zaplusb*zvpice); + zterm2 = (double) 0.65*(*yrecldp).rcl_const6i*(pow(zpr02, (*yrecldp) + .rcl_const4i)) + (*yrecldp).rcl_const3i*(pow(zcorrfac, (double) 0.5)) + *(pow(zrho, (double) 0.5))*(pow(zpr02, (*yrecldp).rcl_const5i)) / + (pow(zcorrfac2, (double) 0.5)); + + zdepos = fmax(za[jk_i]*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0 + ); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + } + + } + // on IDEPICE + + //###################################################################### + // 4 *** PRECIPITATION PROCESSES *** + //###################################################################### + + //---------------------------------- + // revise in-cloud consensate amount + //---------------------------------- + ztmpa = (double) 1.0 / fmax(za[jk_i], zepsec); + zliqcld = zqxfg[0]*ztmpa; + zicecld = zqxfg[1]*ztmpa; + zlicld = zliqcld + zicecld; + + //---------------------------------------------------------------------- + // 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + // now that rain, snow, graupel species are prognostic + // the precipitation flux can be defined directly level by level + // There is no vertical memory required from the flux variable + //---------------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm] || jm + 1 == 2) { + //------------------------ + // source from layer above + //------------------------ + if (jk + 1 > (*yrecldp).ncldtop) { + zfallsrce[jm] = zpfplsx[jk_i + 2*jm]*zdtgdp; + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zfallsrce[jm]; + zqxfg[jm] = zqxfg[jm] + zfallsrce[jm]; + // use first guess precip----------V + zqpretot = zqpretot + zqxfg[jm]; + } + //------------------------------------------------- + // sink to next layer, constant fall speed + //------------------------------------------------- + // if aerosol effect then override + // note that for T>233K this is the same as above. + if ((*yrecldp).laericesed && jm + 1 == 2) { + zre_ice = pre_ice[jl + klon*(jk + klev*ibl)]; + // The exponent value is from + // Morrison et al. JAS 2005 Appendix + zvqx[1] = (double) 0.002*(pow(zre_ice, (double) 1.0)); + } + zfall = zvqx[jm]*zrho; + //------------------------------------------------- + // modified by Heymsfield and Iaquinta JAS 2000 + //------------------------------------------------- + // ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + // &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + zfallsink[jm] = zdtgdp*zfall; + // Cloud budget diagnostic stored at end as implicit + // jl + } + // LLFALL + } + // jm + + //--------------------------------------------------------------- + // Precip cover overlap using MAX-RAN Overlap + // Since precipitation is now prognostic we must + // 1) apply an arbitrary minimum coverage (0.3) if precip>0 + // 2) abandon the 2-flux clr/cld treatment + // 3) Thus, since we have no memory of the clear sky precip + // fraction, we mimic the previous method by reducing + // ZCOVPTOT(JL), which has the memory, proportionally with + // the precip evaporation rate, taking cloud fraction + // into account + // #3 above leads to much smoother vertical profiles of + // precipitation fraction than the Klein-Jakob scheme which + // monotonically increases precip fraction and then resets + // it to zero in a step function once clear-sky precip reaches + // zero. + //--------------------------------------------------------------- + if (zqpretot > zepsec) { + zcovptot = (double) 1.0 - (((double) 1.0 - zcovptot)*((double) 1.0 - + fmax(za[jk_i], za[jk_im1]))) / ((double) 1.0 - fmin(za[jk_im1], (double) 1.0 - (double) 1.E-06)); // here!!! + zcovptot = fmax(zcovptot, (*yrecldp).rcovpmin); + zcovpclr = fmax((double) 0.0, zcovptot - za[jk_i]); // clear sky proportion + zraincld = zqxfg[2] / zcovptot; + zsnowcld = zqxfg[3] / zcovptot; + zcovpmax = fmax(zcovptot, zcovpmax); + } else { + zraincld = (double) 0.0; + zsnowcld = (double) 0.0; + zcovptot = (double) 0.0; // no flux - reset cover + zcovpclr = (double) 0.0; // reset clear sky proportion + zcovpmax = (double) 0.0; // reset max cover for ZZRH calc + } + + //---------------------------------------------------------------------- + // 4.3a AUTOCONVERSION TO SNOW + //---------------------------------------------------------------------- + + if (ztp1[jk_i] <= rtt) { + //----------------------------------------------------- + // Snow Autoconversion rate follow Lin et al. 1983 + //----------------------------------------------------- + if (zicecld > zepsec) { + + zzco = ptsphy*(*yrecldp).rsnowlin1*exp((*yrecldp).rsnowlin2*(ztp1[jk_i] - rtt)); + + if ((*yrecldp).laericeauto) { + zlcrit = picrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=0.027 + zzco = zzco*(pow(((*yrecldp).rnice / pnice[jl + klon*(jk + klev*ibl)]), + (double) 0.333)); + } else { + zlcrit = (*yrecldp).rlcritsnow; + } + + zsnowaut = zzco*((double) 1.0 - exp(-(pow((zicecld / zlcrit), 2)))); + zsolqb[3 + 5*(1)] = zsolqb[3 + 5*(1)] + zsnowaut; + + } + } + + //---------------------------------------------------------------------- + // 4.3b AUTOCONVERSION WARM CLOUDS + // Collection and accretion will require separate treatment + // but for now we keep this simple treatment + //---------------------------------------------------------------------- + + if (zliqcld > zepsec) { + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Sundqvist (1989) + //- + //-------------------------------------------------------- + if (iwarmrain == 1) { + + zzco = (*yrecldp).rkconv*ptsphy; + + if ((*yrecldp).laerliqautolsp) { + zlcrit = plcrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=125 cm-3 + zzco = zzco*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), + (double) 0.333)); + } else { + // Modify autoconversion threshold dependent on: + // land (polluted, high CCN, smaller droplets, higher threshold) + // sea (clean, low CCN, larger droplets, lower threshold) + if (plsm[jl + klon*ibl] > (double) 0.5) { + zlcrit = (*yrecldp).rclcrit_land; // land + } else { + zlcrit = (*yrecldp).rclcrit_sea; // ocean + } + } + + //------------------------------------------------------------------ + // Parameters for cloud collection by rain and snow. + // Note that with new prognostic variable it is now possible + // to REPLACE this with an explicit collection parametrization + //------------------------------------------------------------------ + zprecip = (zpfplsx[jk_i + 2*(3)] + zpfplsx[jk_i + 2*(2) + ]) / fmax(zepsec, zcovptot); + zcfpr = (double) 1.0 + (*yrecldp).rprc1*sqrt(fmax(zprecip, (double) 0.0)); + // ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + // &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + if ((*yrecldp).laerliqcoll) { + // 5.0 = N**0.333 with N=125 cm-3 + zcfpr = zcfpr*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), + (double) 0.333)); + } + + zzco = zzco*zcfpr; + zlcrit = zlcrit / fmax(zcfpr, zepsec); + + if (zliqcld / zlcrit < (double) 20.0) { + // Security for exp for some compilers + zrainaut = zzco*((double) 1.0 - exp(-(pow((zliqcld / zlcrit), 2)))); + } else { + zrainaut = zzco; + } + + // rain freezes instantly + if (ztp1[jk_i] <= rtt) { + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zrainaut; + } else { + zsolqb[2 + 5*(0)] = zsolqb[2 + 5*(0)] + zrainaut; + } + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Khairoutdinov and Kogan (2000) + //- + //-------------------------------------------------------- + } else if (iwarmrain == 2) { + + if (plsm[jl + klon*ibl] > (double) 0.5) { + // land + zconst = (*yrecldp).rcl_kk_cloud_num_land; + zlcrit = (*yrecldp).rclcrit_land; + } else { + // ocean + zconst = (*yrecldp).rcl_kk_cloud_num_sea; + zlcrit = (*yrecldp).rclcrit_sea; + } + + if (zliqcld > zlcrit) { + + zrainaut = (double) 1.5*za[jk_i]*ptsphy*(*yrecldp) + .rcl_kkaau*(pow(zliqcld, (*yrecldp).rcl_kkbauq))*(pow(zconst, (*yrecldp + ).rcl_kkbaun)); + + zrainaut = fmin(zrainaut, zqxfg[0]); + if (zrainaut < zepsec) { + zrainaut = (double) 0.0; + } + + zrainacc = (double) 2.0*za[jk_i]*ptsphy*(*yrecldp) + .rcl_kkaac*(pow((zliqcld*zraincld), (*yrecldp).rcl_kkbac)); + + zrainacc = fmin(zrainacc, zqxfg[0]); + if (zrainacc < zepsec) { + zrainacc = (double) 0.0; + } + + } else { + zrainaut = (double) 0.0; + zrainacc = (double) 0.0; + } + + // If temperature < 0, then autoconversion produces snow rather than rain + // Explicit + if (ztp1[jk_i] <= rtt) { + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainaut; + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainaut; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainacc; + } else { + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainaut; + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainaut; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainacc; + } + + } + // on IWARMRAIN + + } + // on ZLIQCLD > ZEPSEC + + + //---------------------------------------------------------------------- + // RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + // only active if T<0degC and supercooled liquid water is present + // AND if not Sundquist autoconversion (as this includes riming) + //---------------------------------------------------------------------- + if (iwarmrain > 1) { + + if (ztp1[jk_i] <= rtt && zliqcld > zepsec) { + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), (double) 0.4); + + //------------------------------------------------------------------ + // Riming of snow by cloud water - implicit in lwc + //------------------------------------------------------------------ + if (zsnowcld > zepsec && zcovptot > (double) 0.01) { + + // Calculate riming term + // Factor of liq water taken out because implicit + zsnowrime = (double) 0.3*zcovptot*ptsphy*(*yrecldp) + .rcl_const7s*zfallcorr*(pow((zrho*zsnowcld*(*yrecldp).rcl_const1s), + (*yrecldp).rcl_const8s)); + + // Limit snow riming term + zsnowrime = fmin(zsnowrime, (double) 1.0); + + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zsnowrime; + + } + + //------------------------------------------------------------------ + // Riming of ice by cloud water - implicit in lwc + // NOT YET ACTIVE + //------------------------------------------------------------------ + // IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + // + // ! Calculate riming term + // ! Factor of liq water taken out because implicit + // ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + // & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + // + // ! Limit ice riming term + // ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + // + // ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + // + // ENDIF + } + + } + // on IWARMRAIN > 1 + + + //---------------------------------------------------------------------- + // 4.4a MELTING OF SNOW and ICE + // with new implicit solver this also has to treat snow or ice + // precipitating from the level above... i.e. local ice AND flux. + // in situ ice and snow: could arise from LS advection or warming + // falling ice and snow: arrives by precipitation process + //---------------------------------------------------------------------- + + zicetot = zqxfg[1] + zqxfg[3]; + zmeltmax = (double) 0.0; + + // If there are frozen hydrometeors present and dry-bulb temperature > 0degC + if (zicetot > zepsec && ztp1[jk_i] > rtt) { + + // Calculate subsaturation + zsubsat = fmax(zqsice - zqx[4], (double) 0.0); + + // Calculate difference between dry-bulb (ZTP1) and the temperature + // at which the wet-bulb=0degC (rtt-ZSUBSAT*....) using an approx. + // Melting only occurs if the wet-bulb temperature >0 + // i.e. warming of ice particle due to melting > cooling + // due to evaporation. + ztdmtw0 = ztp1[jk_i] - rtt - zsubsat*(ztw1 + ztw2*(pap[jl + klon*(jk + + klev*ibl)] - ztw3) - ztw4*(ztp1[jk_i] - ztw5)); + // Not implicit yet... + // Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + zcons1 = + fabs(ptsphy*((double) 1.0 + (double) 0.5*ztdmtw0) / (*yrecldp).rtaumel); + zmeltmax = fmax(ztdmtw0*zcons1*zrldcp, (double) 0.0); + } + + // Loop over frozen hydrometeors (ice, snow) + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (iphase[jm] == 2) { + jn = imelt[jm]; + if (zmeltmax > zepsec && zicetot > zepsec) { + // Apply melting in same proportion as frozen hydrometeor fractions + zalfa = zqxfg[jm] / zicetot; + zmelt = fmin(zqxfg[jm], zalfa*zmeltmax); + // needed in first guess + // This implies that zqpretot has to be recalculated below + // since is not conserved here if ice falls and liquid doesn't + zqxfg[jm] = zqxfg[jm] - zmelt; + zqxfg[-1 + jn] = zqxfg[-1 + jn] + zmelt; + zsolqa[-1 + jn + 5*jm] = zsolqa[-1 + jn + 5*jm] + zmelt; + zsolqa[jm + 5*(-1 + jn)] = zsolqa[jm + 5*(-1 + jn)] - zmelt; + } + } + } + + //---------------------------------------------------------------------- + // 4.4b FREEZING of RAIN + //---------------------------------------------------------------------- + + // If rain present + if (zqx[2] > zepsec) { + + if (ztp1[jk_i] <= rtt && ztp1[jk_im1] > rtt) { + // Base of melting layer/top of refreezing layer so + // store rain/snow fraction for precip type diagnosis + // If mostly rain, then supercooled rain slow to freeze + // otherwise faster to freeze (snow or ice pellets) + zqpretot = fmax(zqx[3] + zqx[2], zepsec); + prainfrac_toprfz[jl + klon*ibl] = zqx[2] / zqpretot; + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + llrainliq = true; + } else { + llrainliq = false; + } + } + + // If temperature less than zero + if (ztp1[jk_i] < rtt) { + + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + + // Majority of raindrops completely melted + // Refreezing is by slow heterogeneous freezing + + // Slope of rain particle size distribution + zlambda = + pow(((*yrecldp).rcl_fac1 / (zrho*zqx[2])), (*yrecldp).rcl_fac2); + + // Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ztemp = (*yrecldp).rcl_fzrab*(ztp1[jk_i] - rtt); + zfrz = ptsphy*((*yrecldp).rcl_const5r / zrho)*(exp(ztemp) - (double) 1.) + *(pow(zlambda, (*yrecldp).rcl_const6r)); + zfrzmax = fmax(zfrz, (double) 0.0); + + } else { + + // Majority of raindrops only partially melted + // Refreeze with a shorter timescale (reverse of melting...for now) + + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*(rtt - ztp1[jk_i]) + ) / (*yrecldp).rtaumel); + zfrzmax = fmax((rtt - ztp1[jk_i])*zcons1*zrldcp, (double) 0.0); + + } + + if (zfrzmax > zepsec) { + zfrz = fmin(zqx[2], zfrzmax); + zsolqa[3 + 5*(2)] = zsolqa[3 + 5*(2)] + zfrz; + zsolqa[2 + 5*(3)] = zsolqa[2 + 5*(3)] - zfrz; + } + } + + } + + + //---------------------------------------------------------------------- + // 4.4c FREEZING of LIQUID + //---------------------------------------------------------------------- + // not implicit yet... + zfrzmax = fmax(((*yrecldp).rthomo - ztp1[jk_i])*zrldcp, (double) 0.0); + + jm = 1; + jn = imelt[-1 + jm]; + if (zfrzmax > zepsec && zqxfg[-1 + jm] > zepsec) { + zfrz = fmin(zqxfg[-1 + jm], zfrzmax); + zsolqa[-1 + jn + 5*(-1 + jm)] = zsolqa[-1 + jn + 5*(-1 + jm)] + zfrz; + zsolqa[-1 + jm + 5*(-1 + jn)] = zsolqa[-1 + jm + 5*(-1 + jn)] - zfrz; + } + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF RAIN/SNOW + //---------------------------------------------------------------------- + + //---------------------------------------- + // Rain evaporation scheme from Sundquist + //---------------------------------------- + if (ievaprain == 1) { + + // Rain + + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + zqe = (zqx[4] - za[jk_i]*zqsliq) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsliq)); + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq; + + if (llo1) { + // note: zpreclr is a rain flux + zpreclr = zqxfg[2]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + + 1)*ibl)]) / (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(double) 0.5*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsliq; + zdpr = zcovpclr*zbeta*(zqsliq - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Evaporate rain + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + + //--------------------------------------------------------- + // Rain evaporation scheme based on Abel and Boutle (2013) + //--------------------------------------------------------- + } else if (ievaprain == 2) { + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for rain evaporation + // to avoid cloud formation and saturation of the grid box + //----------------------------------------------------------------------- + // Limit RH for rain evaporation dependent on precipitation fraction + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + // Critical relative humidity + //ZRHC=RAMID + //ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + //IF(ZSIGK > 0.8_JPRB) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + //ENDIF + //ZZRH = MIN(ZRHC,ZZRH) + + // Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + zzrh = fmin((double) 0.8, zzrh); + + zqe = fmax((double) 0.0, fmin(zqx[4], zqsliq)); + + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq; + + if (llo1) { + + //------------------------------------------- + // Abel and Boutle (2012) evaporation + //------------------------------------------- + // Calculate local precipitation (kg/kg) + zpreclr = zqxfg[2] / zcovptot; + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), 0.4); + + // Saturation vapour pressure with respect to liquid phase + zesatliq = rv / rd*((double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)))); + + // Slope of particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zpreclr)), (*yrecldp).rcl_fac2); // ZPRECLR=kg/kg + + zevap_denom = (*yrecldp).rcl_cdenom1*zesatliq - (*yrecldp) + .rcl_cdenom2*ztp1[jk_i]*zesatliq + (*yrecldp) + .rcl_cdenom3*(pow(ztp1[jk_i], (double) 3.))*pap[jl + klon*(jk + + klev*ibl)]; + + // Temperature dependent conductivity + zcorr2 = (pow((ztp1[jk_i] / (double) 273.), (double) 1.5))*(double) + 393. / (ztp1[jk_i] + (double) 120.); + zka = (*yrecldp).rcl_ka273*zcorr2; + + zsubsat = fmax(zzrh*zqsliq - zqe, (double) 0.0); + + zbeta = ((double) 0.5 / zqsliq)*(pow(ztp1[jk_i], (double) 2.)) + *zesatliq*(*yrecldp).rcl_const1r*(zcorr2 / zevap_denom)*((double) 0.78 / + (pow(zlambda, (*yrecldp).rcl_const4r)) + (*yrecldp) + .rcl_const2r*(pow((zrho*zfallcorr), (double) 0.5)) / ((pow(zcorr2, + (double) 0.5))*(pow(zlambda, (*yrecldp).rcl_const3r)))); + + zdenom = (double) 1.0 + zbeta*ptsphy; //*ZCORQSLIQ(JL) + zdpevap = zcovpclr*zbeta*ptsphy*zsubsat / zdenom; + + //--------------------------------------------------------- + // Add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Limit rain evaporation + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + } + // on IEVAPRAIN + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF SNOW + //---------------------------------------------------------------------- + // Snow + if (ievapsnow == 1) { + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice)); + llo1 = zcovpclr > zepsec && zqxfg[3] > zepsec && zqe < zzrh*zqsice; + + if (llo1) { + // note: zpreclr is a rain flux a + zpreclr = zqxfg[3]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + + 1)*ibl)]) / (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsice; + zdpr = zcovpclr*zbeta*(zqsice - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce snow to zero and model + // produces small amounts of snowfall everywhere. + //--------------------------------------------------------- + + // Evaporate snow + zevap = fmin(zdpevap, zqxfg[3]); + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqxfg[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + //--------------------------------------------------------- + } else if (ievapsnow == 2) { + + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for snow evaporation + //----------------------------------------------------------------------- + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice)); + llo1 = zcovpclr > zepsec && zqx[3] > zepsec && zqe < zzrh*zqsice; + + if (llo1) { + + // Calculate local precipitation (kg/kg) + zpreclr = zqx[3] / zcovptot; + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))*rv / rd; + + // Particle size distribution + // ZTCG increases Ni with colder temperatures - essentially a + // Fletcher or Meyers scheme? + ztcg = (double) 1.0; //v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + // ZFACX1I modification is based on Andrew Barrett's results + zfacx1s = (double) 1.0; //v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk_i] + + pap[jl + klon*(jk + klev*ibl)]*(*yrecldp).rcl_apb3*(pow(ztp1[jk_i], 3)); + zcorrfac = pow((1.0 / zrho), 0.5); + zcorrfac2 = + (pow((ztp1[jk_i] / 273.0), 1.5))*(393.0 / (ztp1[jk_i] + 120.0)) + ; + + zpr02 = zrho*zpreclr*(*yrecldp).rcl_const1s / (ztcg*zfacx1s); + + zterm1 = (zqsice - zqe)*(pow(ztp1[jk_i], 2)) + *zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2s*zfacx1s / + (zrho*zaplusb*zqsice); + zterm2 = 0.65*(*yrecldp).rcl_const6s*(pow(zpr02, (*yrecldp).rcl_const4s)) + + (*yrecldp).rcl_const3s*(pow(zcorrfac, 0.5))*(pow(zrho, 0.5))*(pow(zpr02, + (*yrecldp).rcl_const5s)) / (pow(zcorrfac2, 0.5)); + + zdpevap = fmax(zcovpclr*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit evaporation to snow amount + //-------------------------------------------------------------------- + zevap = fmin(zdpevap, zevaplimice); + zevap = fmin(zevap, zqx[3]); + + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqx[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + + } + // on IEVAPSNOW + + //-------------------------------------- + // Evaporate small precipitation amounts + //-------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm]) { + if (zqxfg[jm] < (*yrecldp).rlmin) { + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zqxfg[jm]; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zqxfg[jm]; + } + } + } + + //###################################################################### + // 5.0 *** SOLVERS FOR A AND L *** + // now use an implicit solution rather than exact solution + // solver is forward in time, upstream difference for advection + //###################################################################### + + //--------------------------- + // 5.1 solver for cloud cover + //--------------------------- + zanew = (za[jk_i] + zsolac) / ((double) 1.0 + zsolab); + zanew = fmin(zanew, (double) 1.0); + if (zanew < (*yrecldp).ramin) { + zanew = (double) 0.0; + } + zda = zanew - zaorig; + //--------------------------------- + // variables needed for next level + //--------------------------------- + zanewm1 = zanew; + + //-------------------------------- + // 5.2 solver for the microphysics + //-------------------------------- + + //-------------------------------------------------------------- + // Truncate explicit sinks to avoid negatives + // Note: Species are treated in the order in which they run out + // since the clipping will alter the balance for the other vars + //-------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + llindex3[jn + 5*jm] = false; + } + zsinksum[jm] = (double) 0.0; + } + + //---------------------------- + // collect sink terms and mark + //---------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsinksum[jm] = zsinksum[jm] - zsolqa[jm + 5*jn]; // +ve total is bad + } + } + + //--------------------------------------- + // calculate overshoot and scaling factor + //--------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zmax = fmax(zqx[jm], zepsec); + zrat = fmax(zsinksum[jm], zmax); + zratio[jm] = zmax / zrat; + } + + //-------------------------------------------- + // scale the sink terms, in the correct order, + // recalculating the scale factor each time + //-------------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zsinksum[jm] = (double) 0.0; + } + + //---------------- + // recalculate sum + //---------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + psum_solqa = 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + psum_solqa = psum_solqa + zsolqa[jm + 5*jn]; + } + // ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + zsinksum[jm] = zsinksum[jm] - psum_solqa; + //--------------------------- + // recalculate scaling factor + //--------------------------- + zmm = fmax(zqx[jm], zepsec); + zrr = fmax(zsinksum[jm], zmm); + zratio[jm] = zmm / zrr; + //------ + // scale + //------ + zzratio = zratio[jm]; + //DIR$ IVDEP + //DIR$ PREFERVECTOR + for (jn = 0; jn <= 5 + -1; jn += 1) { + if (zsolqa[jm + 5*jn] < (double) 0.0) { + zsolqa[jm + 5*jn] = zsolqa[jm + 5*jn]*zzratio; + zsolqa[jn + 5*jm] = zsolqa[jn + 5*jm]*zzratio; + } + } + } + + //-------------------------------------------------------------- + // 5.2.2 Solver + //------------------------ + + //------------------------ + // set the LHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + //---------------------------------------------- + // diagonals: microphysical sink terms+transport + //---------------------------------------------- + if (jn + 1 == jm + 1) { + zqlhs[jn + 5*jm] = (double) 1.0 + zfallsink[jm]; + for (jo = 0; jo <= 5 + -1; jo += 1) { + zqlhs[jn + 5*jm] = zqlhs[jn + 5*jm] + zsolqb[jo + 5*jn]; + } + //------------------------------------------ + // non-diagonals: microphysical source terms + //------------------------------------------ + } else { + zqlhs[jn + 5*jm] = -zsolqb[jn + 5*jm]; // here is the delta T - missing from doc. + } + } + } + + //------------------------ + // set the RHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + //--------------------------------- + // sum the explicit source and sink + //--------------------------------- + zexplicit = (double) 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + zexplicit = zexplicit + zsolqa[jm + 5*jn]; // sum over middle index + } + zqxn[jm] = zqx[jm] + zexplicit; + } + + //----------------------------------- + // *** solve by LU decomposition: *** + //----------------------------------- + + // Note: This fast way of solving NCLVxNCLV system + // assumes a good behaviour (i.e. non-zero diagonal + // terms with comparable orders) of the matrix stored + // in ZQLHS. For the moment this is the case but + // be aware to preserve it when doing eventual + // modifications. + + // Non pivoting recursive factorization + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + // number of steps + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + // row index + zqlhs[jm + 5*jn] = zqlhs[jm + 5*jn] / zqlhs[jn + 5*jn]; + for (ik = jn + 1; ik <= 5 + -1; ik += 1) { + // column index + zqlhs[jm + 5*ik] = zqlhs[jm + 5*ik] - zqlhs[jm + 5*jn]*zqlhs[jn + 5*ik]; + } + } + } + + // Backsubstitution + // step 1 + for (jn = 1; jn <= 5 + -1; jn += 1) { + for (jm = 0; jm <= jn + 1 - 1 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + } + // step 2 + zqxn[4] = zqxn[4] / zqlhs[4 + 5*(4)]; + for (jn = -2 + 5; jn >= 1 + -1; jn += -1) { + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + zqxn[jn] = zqxn[jn] / zqlhs[jn + 5*jn]; + } + + // Ensure no small values (including negatives) remain in cloud variables nor + // precipitation rates. + // Evaporate l,i,r,s to water vapour. Latent heating taken into account below + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + if (zqxn[jn] < zepsec) { + zqxn[4] = zqxn[4] + zqxn[jn]; + zqxn[jn] = (double) 0.0; + } + } + + //-------------------------------- + // variables needed for next level + //-------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxnm1[jm] = zqxn[jm]; + zqxn2d[jm] = zqxn[jm]; + } + + //------------------------------------------------------------------------ + // 5.3 Precipitation/sedimentation fluxes to next level + // diagnostic precipitation fluxes + // It is this scaled flux that must be used for source to next layer + //------------------------------------------------------------------------ + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[jk_ip1 + 2*jm] = zfallsink[jm]*zqxn[jm]*zrdtgdp; + } + + // Ensure precipitation fraction is zero if no precipitation + zqpretot = + zpfplsx[jk_ip1 + 2*(3)] + zpfplsx[jk_ip1 + 2*(2)]; + if (zqpretot < zepsec) { + zcovptot = (double) 0.0; + } + + //###################################################################### + // 6 *** UPDATE TENDANCIES *** + //###################################################################### + + //-------------------------------- + // 6.1 Temperature and CLV budgets + //-------------------------------- + + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + + // calculate fluxes in and out of box for conservation of TL + zfluxq[jm] = zpsupsatsrce[jm] + zconvsrce[jm] + zfallsrce[jm] - + (zfallsink[jm] + zconvsink[jm])*zqxn[jm]; + + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + + klon*(jk + klev*ibl)] + ralvdcp*(zqxn[jm] - zqx[jm] - + zfluxq[jm])*zqtmst; + } + + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + + klon*(jk + klev*ibl)] + ralsdcp*(zqxn[jm] - zqx[jm] - + zfluxq[jm])*zqtmst; + } + + //---------------------------------------------------------------------- + // New prognostic tendencies - ice,liquid rain,snow + // Note: CLV arrays use PCLV in calculation of tendency while humidity + // uses ZQX. This is due to clipping at start of cloudsc which + // include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + //---------------------------------------------------------------------- + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] + + (zqxn[jm] - zqx0[jm])*zqtmst; + + } + + //---------------------- + // 6.2 Humidity budget + //---------------------- + tendency_loc_q[jl + klon*(jk + klev*ibl)] = tendency_loc_q[jl + + klon*(jk + klev*ibl)] + (zqxn[4] - zqx[4])*zqtmst; + + //------------------- + // 6.3 cloud cover + //----------------------- + tendency_loc_a[jl + klon*(jk + klev*ibl)] = + tendency_loc_a[jl + klon*(jk + klev*ibl)] + zda*zqtmst; + + //-------------------------------------------------- + // Copy precipitation fraction into output variable + //------------------------------------------------- + pcovptot[jl + klon*(jk + klev*ibl)] = zcovptot; + + } + } + + // on vertical level JK + //---------------------------------------------------------------------- + // END OF VERTICAL LOOP + //---------------------------------------------------------------------- + + //###################################################################### + // 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + //###################################################################### + + //-------------------------------------------------------------------- + // Copy general precip arrays back into PFP arrays for GRIB archiving + // Add rain and liquid fluxes, ice and snow fluxes + //-------------------------------------------------------------------- + pfplsl[jl + klon*(jk + (klev + 1)*ibl)] = + zpfplsx[jk_i + 2*(2)] + zpfplsx[jk_i + 2*(0)]; + pfplsn[jl + klon*(jk + (klev + 1)*ibl)] = + zpfplsx[jk_i + 2*(3)] + zpfplsx[jk_i + 2*(1)]; + + if (1 <= jk + 1 && jk + 1 <= klev) { + + zgdph_r = -zrg_r*(paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - paph[jl + klon*(jk + + (klev + 1)*ibl)])*zqtmst; + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqltur[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqitur[jl + klon*(jk + (klev + 1)*ibl)]; + + zalfaw = zfoealfa; + + // Liquid , LS scheme minus detrainment + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[0] - zqx0[0] + pvfl[jl + klon*(jk + klev*ibl) + ]*ptsphy - zalfaw*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // liquid, negative numbers + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[0]*zgdph_r; + + // liquid, vertical diffusion + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqltur[jl + klon*(1 + jk + + (klev + 1)*ibl)] + pvfl[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // Rain, LS scheme + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqrf[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[2] - zqx0[2])*zgdph_r; + // rain, negative numbers + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[2]*zgdph_r; + + // Ice , LS scheme minus detrainment + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[1] - zqx0[1] + pvfi[jl + klon*(jk + klev*ibl) + ]*ptsphy - ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // ice, negative numbers + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[1]*zgdph_r; + + // ice, vertical diffusion + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqitur[jl + klon*(1 + jk + + (klev + 1)*ibl)] + pvfi[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // snow, LS scheme + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqsf[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[3] - zqx0[3])*zgdph_r; + // snow, negative numbers + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[3]*zgdph_r; + } + + //----------------------------------- + // enthalpy flux due to precipitation + //----------------------------------- + pfhpsl[jl + klon*(jk + (klev + 1)*ibl)] = + -rlvtt*pfplsl[jl + klon*(jk + (klev + 1)*ibl)]; + pfhpsn[jl + klon*(jk + (klev + 1)*ibl)] = + -rlstt*pfplsn[jl + klon*(jk + (klev + 1)*ibl)]; + } +} + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.h b/src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.h new file mode 100644 index 00000000..e4d0364b --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_c_k_caching.h @@ -0,0 +1,44 @@ +#include "hip/hip_runtime.h" +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "hip/hip_runtime.h" +#include "yoecldp_c.h" +#include + +__global__ void __launch_bounds__(128, 1) cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2); + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp b/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp new file mode 100644 index 00000000..c03a877d --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp @@ -0,0 +1,607 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "cloudsc_driver.h" + +#include +#include "mycpu.h" + +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(hipError_t code, const char *file, int line, bool abort=true) +{ + if (code != hipSuccess) + { + fprintf(stderr,"GPUassert: %s %s %d\n", hipGetErrorString(code), file, line); + if (abort) exit(code); + } +} + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +void cloudsc_driver(int numthreads, int numcols, int nproma) { + + double *tend_tmp_u; + double *tend_tmp_v; + double *tend_tmp_t; + double *tend_tmp_q; + double *tend_tmp_o3; + double *tend_tmp_a; + double *tend_tmp_cld; + + double *tend_loc_u; + double *tend_loc_v; + double *tend_loc_t; + double *tend_loc_q; + double *tend_loc_o3; + double *tend_loc_a; + double *tend_loc_cld; + + double *tend_cml_u; + double *tend_cml_v; + double *tend_cml_t; + double *tend_cml_q; + double *tend_cml_o3; + double *tend_cml_a; + double *tend_cml_cld; + + double ptsphy; //! Physics timestep + + double *plcrit_aer; + double *picrit_aer; + double *pre_ice; + double *pccn; //! liquid cloud condensation nuclei + double *pnice; //! ice number concentration (cf. CCN) + double *pt; //! T at start of callpar + double *pq; //! Q at start of callpar + double *pvfa; //! CC from VDF scheme + double *pvfl; //! Liq from VDF scheme + double *pvfi; //! Ice from VDF scheme + double *pdyna; //! CC from Dynamics + double *pdynl; //! Liq from Dynamics + double *pdyni; //! Liq from Dynamics + double *phrsw; //! Short-wave heating rate + double *phrlw; //! Long-wave heating rate + double *pvervel; //! Vertical velocity + double *pap; //! Pressure on full levels + double *paph; //! Pressure on half levels + double *plsm; //! Land fraction (0-1) + int *ldcum; + int *ktype; //! Convection type 0,1,2 + double *plu; //! Conv. condensate + double *plude; //! Conv. detrained water + double *plude_tmp; + double *psnde; //! Conv. detrained snow + double *pmfu; //! Conv. mass flux up + double *pmfd; //! Conv. mass flux down + double *pa; //! Original Cloud fraction (t) + + double *pclv; + double *psupsat; + + double *pcovptot; //! Precip fraction + double *prainfrac_toprfz; + double *pfsqlf; //! Flux of liquid + double *pfsqif; //! Flux of ice + double *pfcqlng; //! -ve corr for liq + double *pfcqnng; //! -ve corr for ice + double *pfsqrf; //! Flux diagnostics + double *pfsqsf; //! for DDH, generic + double *pfcqrng; //! rain + double *pfcqsng; //! snow + double *pfsqltur; //! liquid flux due to VDF + double *pfsqitur; //! ice flux due to VDF + double *pfplsl; //! liq+rain sedim flux + double *pfplsn; //! ice+snow sedim flux + double *pfhpsl; //! Enthalpy flux for liq + double *pfhpsn; //! Enthalpy flux for ice + + /* Define or query data dimensions from input file */ + int klon, nlev; + int nblocks = (numcols / nproma) + min(numcols % nproma, 1); + + double zinfo[4][numthreads]; + const double zhpm = 12482329.0; // IBM P7 HPM flop count for 100 points at L137 + + int nclv; // number of microphysics variables + int ncldql; // liquid cloud water + int ncldqi; // ice cloud water + int ncldqr; // rain water + int ncldqs; // snow + int ncldqv; // vapour + + nclv = 5; // number of microphysics variables + ncldql = 1; // liquid cloud water + ncldqi = 2; // ice cloud water + ncldqr = 3; // rain water + ncldqs = 4; // snow + ncldqv = 5; // vapour + + struct TECLDP *yrecldp = (struct TECLDP*)malloc(sizeof(struct TECLDP)); + + query_state(&klon, &nlev); + + tend_loc_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_cml_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_tmp_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + + plcrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + picrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pre_ice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pccn = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pnice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pt = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pq = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfi = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyna = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdynl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyni = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrsw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrlw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvervel = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pap = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + paph = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + plsm = (double*) malloc( sizeof(double) * nblocks*nproma ); + ldcum = (int*) malloc( sizeof(int) * nblocks*nproma ); + ktype = (int*) malloc( sizeof(int) * nblocks*nproma ); + plu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + plude = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + psnde = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfd = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pclv = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + psupsat = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pcovptot = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + prainfrac_toprfz = (double*) malloc( sizeof(double) * nblocks*nproma ); + pfsqlf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqif = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqnng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqlng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqrf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqsf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqrng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqsng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqltur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqitur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + + + double rg; + double rd; + double rcpd; + double retv; + double rlvtt; + double rlstt; + double rlmlt; + double rtt; + double rv; + double r2es; + double r3les; + double r3ies; + double r4les; + double r4ies; + double r5les; + double r5ies; + double r5alvcp; + double r5alscp; + double ralvdcp; + double ralsdcp; + double ralfdcp; + double rtwat; + double rtice; + double rticecu; + double rtwat_rtice_r; + double rtwat_rticecu_r; + double rkoop1; + double rkoop2; + + // device declarations + double *d_plcrit_aer; + double *d_picrit_aer; + double *d_pre_ice; + double *d_pccn; + double *d_pnice; + double *d_pt; + double *d_pq; + double *d_tend_loc_t; + double *d_tend_loc_q; + double *d_tend_loc_a; + double *d_tend_loc_cld; + double *d_tend_tmp_t; + double *d_tend_tmp_q; + double *d_tend_tmp_a; + double *d_tend_tmp_cld; + double *d_pvfa; + double *d_pvfl; + double *d_pvfi; + double *d_pdyna; + double *d_pdynl; + double *d_pdyni; + double *d_phrsw; + double *d_phrlw; + double *d_pvervel; + double *d_pap; + double *d_paph; + double *d_plsm; + int *d_ktype; + double *d_plu; + double *d_plude; + double *d_psnde; + double *d_pmfu; + double *d_pmfd; + double *d_pa; + double *d_pclv; + double *d_psupsat; + struct TECLDP *d_yrecldp; + double *d_pcovptot; + double *d_prainfrac_toprfz; + double *d_pfsqlf; + double *d_pfsqif; + double *d_pfcqnng; + double *d_pfcqlng; + double *d_pfsqrf; + double *d_pfsqsf; + double *d_pfcqrng; + double *d_pfcqsng; + double *d_pfsqltur; + double *d_pfsqitur; + double *d_pfplsl; + double *d_pfplsn; + double *d_pfhpsl; + double *d_pfhpsn; + // end device declarations + + hipMalloc(&d_plcrit_aer, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_picrit_aer, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pre_ice, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pccn, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pnice, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pt, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pq, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + hipMalloc(&d_tend_tmp_t, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_tmp_q, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_tmp_a, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_tmp_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + hipMalloc(&d_pvfa, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pvfl, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pvfi, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pdyna, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pdynl, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pdyni, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_phrsw, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_phrlw, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pvervel, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pap, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_paph, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_plsm, sizeof(double) * nblocks*nproma); + hipMalloc(&d_ktype, sizeof(int) * nblocks*nproma); + hipMalloc(&d_plu, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_plude, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_psnde, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pmfu, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pmfd, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pa, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pclv, sizeof(double) * nblocks*nlev*nproma*nclv); + hipMalloc(&d_psupsat, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_yrecldp, sizeof(struct TECLDP)); + hipMalloc(&d_pcovptot, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_prainfrac_toprfz, sizeof(double) * nblocks*nproma); + hipMalloc(&d_pfsqlf, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqif, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqnng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqlng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqrf, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqsf, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqrng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqsng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqltur, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqitur, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfplsl, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfplsn, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfhpsl, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfhpsn, sizeof(double) * nblocks*(nlev+1)*nproma); + + load_state(klon, nlev, nclv, numcols, nproma, &ptsphy, plcrit_aer, picrit_aer, + pre_ice, pccn, pnice, pt, pq, + tend_cml_t, tend_cml_q, tend_cml_a, tend_cml_cld, + tend_tmp_t, tend_tmp_q, tend_tmp_a, tend_tmp_cld, + pvfa, pvfl, pvfi, pdyna, pdynl, pdyni, + phrsw, phrlw, pvervel, pap, paph, plsm, ktype, plu, + plude, psnde, pmfu, pmfd, pa, pclv, psupsat, yrecldp, + &rg, &rd, &rcpd, &retv, &rlvtt, &rlstt, + &rlmlt, &rtt, &rv, &r2es, &r3les, &r3ies, + &r4les, &r4ies, &r5les, &r5ies, &r5alvcp, &r5alscp, + &ralvdcp, &ralsdcp, &ralfdcp, &rtwat, + &rtice, &rticecu, &rtwat_rtice_r, &rtwat_rticecu_r, + &rkoop1, &rkoop2 ); + + // host to device + hipMemcpy(d_plcrit_aer, plcrit_aer, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_picrit_aer, picrit_aer, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pre_ice, pre_ice, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pccn, pccn, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pnice, pnice, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pt, pt, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pq, pq, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_t, tend_loc_t, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_q, tend_loc_q, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_a, tend_loc_a, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_cld, tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_t, tend_tmp_t, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_q, tend_tmp_q, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_a, tend_tmp_a, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_cld, tend_tmp_cld, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyHostToDevice); + hipMemcpy(d_pvfa, pvfa, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pvfl, pvfl, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pvfi, pvfi, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pdyna, pdyna, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pdynl, pdynl, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pdyni, pdyni, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_phrsw, phrsw, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_phrlw, phrlw, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pvervel, pvervel, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pap, pap, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_paph, paph, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_plsm, plsm, sizeof(double) * nblocks*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_ktype, ktype, sizeof(int) * nblocks*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_plu, plu, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_plude, plude, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_psnde, psnde, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pmfu, pmfu, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pmfd, pmfd, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pa, pa, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pclv, pclv, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyHostToDevice); + hipMemcpy(d_psupsat, psupsat, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_yrecldp, yrecldp, sizeof(TECLDP), hipMemcpyHostToDevice); + // end host to device + + double t1 = omp_get_wtime(); + + int b, bsize, icalls=0, igpc=numcols; + int coreid = mycpu(); + int tid = omp_get_thread_num(); + double start = omp_get_wtime(); + + dim3 blockdim(nproma, 1, 1); + //dim3 griddim(1, 1, ceil(((double)numcols) / ((double)nproma))); + dim3 griddim(ceil(((double)numcols) / ((double)nproma)), 1, 1); + int jkglo = 0; + int ibl = (jkglo - 1) / nproma + 1; + int icend = min(nproma, numcols - jkglo + 1); + + cloudsc_c<<>>(1, icend/*bsize*/, nproma/*, nlev*/, ptsphy, d_pt, d_pq, + d_tend_tmp_t, d_tend_tmp_q, d_tend_tmp_a, d_tend_tmp_cld, + d_tend_loc_t, d_tend_loc_q, d_tend_loc_a, d_tend_loc_cld, + d_pvfa, d_pvfl, d_pvfi, + d_pdyna, d_pdynl, d_pdyni, + d_phrsw, d_phrlw, d_pvervel, + d_pap, d_paph, d_plsm, d_ktype, + d_plu, d_plude, d_psnde, d_pmfu, d_pmfd, + d_pa, d_pclv, d_psupsat, + d_plcrit_aer, d_picrit_aer, d_pre_ice, d_pccn, d_pnice, + d_pcovptot, d_prainfrac_toprfz, d_pfsqlf, + d_pfsqif, d_pfcqnng, d_pfcqlng, + d_pfsqrf, d_pfsqsf, d_pfcqrng, + d_pfcqsng, d_pfsqltur, d_pfsqitur, + d_pfplsl, d_pfplsn, d_pfhpsl, d_pfhpsn, d_yrecldp, + nblocks, rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, + rv, r2es, r3les, r3ies, r4les, r4ies, r5les, + r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, + rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, + rkoop1, rkoop2); + + + gpuErrchk( hipPeekAtLastError() ); + gpuErrchk( hipDeviceSynchronize() ); + + double end = omp_get_wtime(); + + // device to host + hipMemcpy(tend_loc_t, d_tend_loc_t, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(tend_loc_q, d_tend_loc_q, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(tend_loc_a, d_tend_loc_a, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(tend_loc_cld, d_tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyDeviceToHost); + hipMemcpy(phrlw, d_phrlw, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(plude, d_plude, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(yrecldp, d_yrecldp, sizeof(TECLDP), hipMemcpyDeviceToHost); + hipMemcpy(pcovptot, d_pcovptot, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(prainfrac_toprfz, d_prainfrac_toprfz, sizeof(double) * nblocks*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqlf, d_pfsqlf, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqif, d_pfsqif, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqnng, d_pfcqnng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqlng, d_pfcqlng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqrf, d_pfsqrf, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqsf, d_pfsqsf, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqrng, d_pfcqrng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqsng, d_pfcqsng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqltur, d_pfsqltur, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqitur, d_pfsqitur, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfplsl, d_pfplsl, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfplsn, d_pfplsn, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfhpsl, d_pfhpsl, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfhpsn, d_pfhpsn, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + // end device to host + + /* int msec = diff * 1000 / CLOCKS_PER_SEC; */ + zinfo[0][tid] = end - start; + zinfo[1][tid] = (double) coreid; + zinfo[2][tid] = (double) icalls; + zinfo[3][tid] = (double) igpc; + + double t2 = omp_get_wtime(); + + printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); + double zfrac, zmflops; + for (int t = 0; t < numthreads; t++) { + const double tloc = zinfo[0][t]; + const int coreid = (int) zinfo[1][t]; + const int icalls = (int) zinfo[2][t]; + const int igpc = (int) zinfo[3][t]; + zfrac = (double)igpc / (double)numcols; + if (tloc > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + } else { + zmflops = 0.; + } + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + } + double tdiff = t2 - t1; + zfrac = 1.0; + if (tdiff > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + } else { + zmflops = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + + cloudsc_validate(klon, nlev, nclv, numcols, nproma, + plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, + pfcqlng, pfcqnng, pfsqrf, pfsqsf, pfcqrng, pfcqsng, + pfsqltur, pfsqitur, pfplsl, pfplsn, pfhpsl, pfhpsn, + tend_loc_a, tend_loc_q, tend_loc_t, tend_loc_cld); + + free(plcrit_aer); // ALLOCATE(PLCRIT_AER(KLON,KLEV)) + free(picrit_aer); // ALLOCATE(PICRIT_AER(KLON,KLEV)) + free(pre_ice); // ALLOCATE(PRE_ICE(KLON,KLEV)) + free(pccn); + free(pnice); + free(pt); + free(pq); + free(pvfa); + free(pvfl); + free(pvfi); + free(pdyna); + free(pdynl); + free(pdyni); + free(phrsw); + free(phrlw); + free(pvervel); + free(pap); + free(paph); // ALLOCATE(PAPH(KLON,KLEV+1)) + free(plsm); + free(ktype); + free(plu); + free(plude); + free(psnde); + free(pmfu); + free(pmfd); + free(pa); + free(pclv); + free(psupsat); + free(pcovptot); + free(tend_loc_t); + free(tend_loc_q); + free(tend_loc_a); + free(tend_loc_cld); + free(tend_tmp_t); + free(tend_tmp_q); + free(tend_tmp_a); + free(tend_tmp_cld); + free(tend_cml_t); + free(tend_cml_q); + free(tend_cml_a); + free(tend_cml_cld); + free(prainfrac_toprfz); + free(pfsqlf); + free(pfsqif); + free(pfcqnng); + free(pfcqlng); + free(pfsqrf); + free(pfsqsf); + free(pfcqrng); + free(pfcqsng); + free(pfsqltur); + free(pfsqitur); + free(pfplsl); + free(pfplsn); + free(pfhpsl); + free(pfhpsn); + free(yrecldp); + + // free device + hipFree(d_plcrit_aer); + hipFree(d_picrit_aer); + hipFree(d_pre_ice); + hipFree(d_pccn); + hipFree(d_pnice); + hipFree(d_pt); + hipFree(d_pq); + hipFree(d_tend_loc_t); + hipFree(d_tend_loc_q); + hipFree(d_tend_loc_a); + hipFree(d_tend_loc_cld); + hipFree(d_tend_tmp_t); + hipFree(d_tend_tmp_q); + hipFree(d_tend_tmp_a); + hipFree(d_tend_tmp_cld); + hipFree(d_pvfa); + hipFree(d_pvfl); + hipFree(d_pvfi); + hipFree(d_pdyna); + hipFree(d_pdynl); + hipFree(d_pdyni); + hipFree(d_phrsw); + hipFree(d_phrlw); + hipFree(d_pvervel); + hipFree(d_pap); + hipFree(d_paph); + hipFree(d_plsm); + hipFree(d_ktype); + hipFree(d_plu); + hipFree(d_plude); + hipFree(d_psnde); + hipFree(d_pmfu); + hipFree(d_pmfd); + hipFree(d_pa); + hipFree(d_pclv); + hipFree(d_psupsat); + hipFree(d_yrecldp); + hipFree(d_pcovptot); + hipFree(d_prainfrac_toprfz); + hipFree(d_pfsqlf); + hipFree(d_pfsqif); + hipFree(d_pfcqnng); + hipFree(d_pfcqlng); + hipFree(d_pfsqrf); + hipFree(d_pfsqsf); + hipFree(d_pfcqrng); + hipFree(d_pfcqsng); + hipFree(d_pfsqltur); + hipFree(d_pfsqitur); + hipFree(d_pfplsl); + hipFree(d_pfplsn); + hipFree(d_pfhpsl); + hipFree(d_pfhpsn); + // end free device +} + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver.h b/src/cloudsc_hip/cloudsc/cloudsc_driver.h new file mode 100644 index 00000000..6892a4cc --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver.h @@ -0,0 +1,23 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include +#include +#include "hip/hip_runtime_api.h" +#include "hip/hip_runtime.h" + +#include "yoecldp_c.h" +#include "load_state.h" +#include "cloudsc_c.h" +#include "cloudsc_validate.h" + +void cloudsc_driver(int numthreads, int numcols, int nproma); diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp new file mode 100644 index 00000000..657bed97 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp @@ -0,0 +1,666 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "cloudsc_driver_hoist.h" + +#include +#include "mycpu.h" + +#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } +inline void gpuAssert(hipError_t code, const char *file, int line, bool abort=true) +{ + if (code != hipSuccess) + { + fprintf(stderr,"GPUassert: %s %s %d\n", hipGetErrorString(code), file, line); + if (abort) exit(code); + } +} + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +void cloudsc_driver(int numthreads, int numcols, int nproma) { + + double *tend_tmp_u; + double *tend_tmp_v; + double *tend_tmp_t; + double *tend_tmp_q; + double *tend_tmp_o3; + double *tend_tmp_a; + double *tend_tmp_cld; + + double *tend_loc_u; + double *tend_loc_v; + double *tend_loc_t; + double *tend_loc_q; + double *tend_loc_o3; + double *tend_loc_a; + double *tend_loc_cld; + + double *tend_cml_u; + double *tend_cml_v; + double *tend_cml_t; + double *tend_cml_q; + double *tend_cml_o3; + double *tend_cml_a; + double *tend_cml_cld; + + double ptsphy; //! Physics timestep + + double *plcrit_aer; + double *picrit_aer; + double *pre_ice; + double *pccn; //! liquid cloud condensation nuclei + double *pnice; //! ice number concentration (cf. CCN) + double *pt; //! T at start of callpar + double *pq; //! Q at start of callpar + double *pvfa; //! CC from VDF scheme + double *pvfl; //! Liq from VDF scheme + double *pvfi; //! Ice from VDF scheme + double *pdyna; //! CC from Dynamics + double *pdynl; //! Liq from Dynamics + double *pdyni; //! Liq from Dynamics + double *phrsw; //! Short-wave heating rate + double *phrlw; //! Long-wave heating rate + double *pvervel; //! Vertical velocity + double *pap; //! Pressure on full levels + double *paph; //! Pressure on half levels + double *plsm; //! Land fraction (0-1) + int *ldcum; + int *ktype; //! Convection type 0,1,2 + double *plu; //! Conv. condensate + double *plude; //! Conv. detrained water + double *plude_tmp; + double *psnde; //! Conv. detrained snow + double *pmfu; //! Conv. mass flux up + double *pmfd; //! Conv. mass flux down + double *pa; //! Original Cloud fraction (t) + + double *pclv; + double *psupsat; + + double *pcovptot; //! Precip fraction + double *prainfrac_toprfz; + double *pfsqlf; //! Flux of liquid + double *pfsqif; //! Flux of ice + double *pfcqlng; //! -ve corr for liq + double *pfcqnng; //! -ve corr for ice + double *pfsqrf; //! Flux diagnostics + double *pfsqsf; //! for DDH, generic + double *pfcqrng; //! rain + double *pfcqsng; //! snow + double *pfsqltur; //! liquid flux due to VDF + double *pfsqitur; //! ice flux due to VDF + double *pfplsl; //! liq+rain sedim flux + double *pfplsn; //! ice+snow sedim flux + double *pfhpsl; //! Enthalpy flux for liq + double *pfhpsn; //! Enthalpy flux for ice + + /* Define or query data dimensions from input file */ + int klon, nlev; + int nblocks = (numcols / nproma) + min(numcols % nproma, 1); + + double zinfo[4][numthreads]; + const double zhpm = 12482329.0; // IBM P7 HPM flop count for 100 points at L137 + + int nclv; // number of microphysics variables + int ncldql; // liquid cloud water + int ncldqi; // ice cloud water + int ncldqr; // rain water + int ncldqs; // snow + int ncldqv; // vapour + + nclv = 5; // number of microphysics variables + ncldql = 1; // liquid cloud water + ncldqi = 2; // ice cloud water + ncldqr = 3; // rain water + ncldqs = 4; // snow + ncldqv = 5; // vapour + + struct TECLDP *yrecldp = (struct TECLDP*)malloc(sizeof(struct TECLDP)); + + query_state(&klon, &nlev); + + tend_loc_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_cml_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_tmp_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + plcrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + picrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pre_ice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pccn = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pnice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pt = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pq = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfi = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyna = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdynl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyni = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrsw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrlw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvervel = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pap = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + paph = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + plsm = (double*) malloc( sizeof(double) * nblocks*nproma ); + ldcum = (int*) malloc( sizeof(int) * nblocks*nproma ); + ktype = (int*) malloc( sizeof(int) * nblocks*nproma ); + plu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + plude = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + psnde = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfd = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pclv = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + psupsat = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pcovptot = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + prainfrac_toprfz = (double*) malloc( sizeof(double) * nblocks*nproma ); + pfsqlf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqif = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqnng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqlng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqrf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqsf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqrng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqsng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqltur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqitur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + + + double rg; + double rd; + double rcpd; + double retv; + double rlvtt; + double rlstt; + double rlmlt; + double rtt; + double rv; + double r2es; + double r3les; + double r3ies; + double r4les; + double r4ies; + double r5les; + double r5ies; + double r5alvcp; + double r5alscp; + double ralvdcp; + double ralsdcp; + double ralfdcp; + double rtwat; + double rtice; + double rticecu; + double rtwat_rtice_r; + double rtwat_rticecu_r; + double rkoop1; + double rkoop2; + + // device declarations + double *d_plcrit_aer; + double *d_picrit_aer; + double *d_pre_ice; + double *d_pccn; + double *d_pnice; + double *d_pt; + double *d_pq; + double *d_tend_loc_t; + double *d_tend_loc_q; + double *d_tend_loc_a; + double *d_tend_loc_cld; + double *d_tend_tmp_t; + double *d_tend_tmp_q; + double *d_tend_tmp_a; + double *d_tend_tmp_cld; + double *d_pvfa; + double *d_pvfl; + double *d_pvfi; + double *d_pdyna; + double *d_pdynl; + double *d_pdyni; + double *d_phrsw; + double *d_phrlw; + double *d_pvervel; + double *d_pap; + double *d_paph; + double *d_plsm; + int *d_ktype; + double *d_plu; + double *d_plude; + double *d_psnde; + double *d_pmfu; + double *d_pmfd; + double *d_pa; + double *d_pclv; + double *d_psupsat; + struct TECLDP *d_yrecldp; + double *d_pcovptot; + double *d_prainfrac_toprfz; + double *d_pfsqlf; + double *d_pfsqif; + double *d_pfcqnng; + double *d_pfcqlng; + double *d_pfsqrf; + double *d_pfsqsf; + double *d_pfcqrng; + double *d_pfcqsng; + double *d_pfsqltur; + double *d_pfsqitur; + double *d_pfplsl; + double *d_pfplsn; + double *d_pfhpsl; + double *d_pfhpsn; + double *d_zfoealfa; + double *d_ztp1; + double *d_zli; + double *d_za; + double *d_zaorig; + double *d_zliqfrac; + double *d_zicefrac; + double *d_zqx; + double *d_zqx0; + double *d_zpfplsx; + double *d_zlneg; + double *d_zqxn2d; + double *d_zqsmix; + double *d_zqsliq; + double *d_zqsice; + double *d_zfoeewmt; + double *d_zfoeew; + double *d_zfoeeliqt; + // end device declarations + + // + hipMalloc(&d_plcrit_aer, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_picrit_aer, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pre_ice, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pccn, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pnice, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pt, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pq, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + hipMalloc(&d_tend_tmp_t, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_tmp_q, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_tmp_a, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_tend_tmp_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + hipMalloc(&d_pvfa, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pvfl, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pvfi, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pdyna, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pdynl, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pdyni, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_phrsw, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_phrlw, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pvervel, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pap, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_paph, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_plsm, sizeof(double) * nblocks*nproma); + hipMalloc(&d_ktype, sizeof(int) * nblocks*nproma); + hipMalloc(&d_plu, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_plude, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_psnde, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pmfu, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pmfd, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pa, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_pclv, sizeof(double) * nblocks*nlev*nproma*nclv); + hipMalloc(&d_psupsat, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_yrecldp, sizeof(struct TECLDP)); + hipMalloc(&d_pcovptot, sizeof(double) * nblocks*nlev*nproma); + hipMalloc(&d_prainfrac_toprfz, sizeof(double) * nblocks*nproma); + hipMalloc(&d_pfsqlf, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqif, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqnng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqlng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqrf, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqsf, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqrng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfcqsng, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqltur, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfsqitur, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfplsl, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfplsn, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfhpsl, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_pfhpsn, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zfoealfa, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_ztp1, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zli, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_za, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zaorig, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zliqfrac, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zicefrac, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zqx, sizeof(double) * nblocks*(nlev+1)*nproma*nclv); + hipMalloc(&d_zqx0, sizeof(double) * nblocks*(nlev+1)*nproma*nclv); + hipMalloc(&d_zpfplsx, sizeof(double) * nblocks*(nlev+1)*nproma*nclv); + hipMalloc(&d_zlneg, sizeof(double) * nblocks*(nlev+1)*nproma*nclv); + hipMalloc(&d_zqxn2d, sizeof(double) * nblocks*(nlev+1)*nproma*nclv); + hipMalloc(&d_zqsmix, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zqsliq, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zqsice, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zfoeewmt, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zfoeew, sizeof(double) * nblocks*(nlev+1)*nproma); + hipMalloc(&d_zfoeeliqt, sizeof(double) * nblocks*(nlev+1)*nproma); + // + + load_state(klon, nlev, nclv, numcols, nproma, &ptsphy, plcrit_aer, picrit_aer, + pre_ice, pccn, pnice, pt, pq, + tend_cml_t, tend_cml_q, tend_cml_a, tend_cml_cld, + tend_tmp_t, tend_tmp_q, tend_tmp_a, tend_tmp_cld, + pvfa, pvfl, pvfi, pdyna, pdynl, pdyni, + phrsw, phrlw, pvervel, pap, paph, plsm, ktype, plu, + plude, psnde, pmfu, pmfd, pa, pclv, psupsat, yrecldp, + &rg, &rd, &rcpd, &retv, &rlvtt, &rlstt, + &rlmlt, &rtt, &rv, &r2es, &r3les, &r3ies, + &r4les, &r4ies, &r5les, &r5ies, &r5alvcp, &r5alscp, + &ralvdcp, &ralsdcp, &ralfdcp, &rtwat, + &rtice, &rticecu, &rtwat_rtice_r, &rtwat_rticecu_r, + &rkoop1, &rkoop2 ); + + // host to device + hipMemcpy(d_plcrit_aer, plcrit_aer, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_picrit_aer, picrit_aer, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pre_ice, pre_ice, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pccn, pccn, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pnice, pnice, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pt, pt, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pq, pq, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_t, tend_loc_t, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_q, tend_loc_q, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_a, tend_loc_a, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_loc_cld, tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_t, tend_tmp_t, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_q, tend_tmp_q, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_a, tend_tmp_a, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_tend_tmp_cld, tend_tmp_cld, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyHostToDevice); + hipMemcpy(d_pvfa, pvfa, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pvfl, pvfl, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pvfi, pvfi, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pdyna, pdyna, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pdynl, pdynl, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pdyni, pdyni, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_phrsw, phrsw, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_phrlw, phrlw, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pvervel, pvervel, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pap, pap, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_paph, paph, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_plsm, plsm, sizeof(double) * nblocks*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_ktype, ktype, sizeof(int) * nblocks*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_plu, plu, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_plude, plude, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_psnde, psnde, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pmfu, pmfu, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pmfd, pmfd, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pa, pa, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_pclv, pclv, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyHostToDevice); + hipMemcpy(d_psupsat, psupsat, sizeof(double) * nblocks*nlev*nproma, hipMemcpyHostToDevice); + hipMemcpy(d_yrecldp, yrecldp, sizeof(TECLDP), hipMemcpyHostToDevice); + // end host to device + + double t1 = omp_get_wtime(); + + int b, bsize, icalls=0, igpc=numcols; + int coreid = mycpu(); + int tid = omp_get_thread_num(); + double start = omp_get_wtime(); + + dim3 blockdim(nproma, 1, 1); + //dim3 griddim(1, 1, ceil(((double)numcols) / ((double)nproma))); + dim3 griddim(ceil(((double)numcols) / ((double)nproma)), 1, 1); + int jkglo = 0; + int ibl = (jkglo - 1) / nproma + 1; + int icend = min(nproma, numcols - jkglo + 1); + + cloudsc_c<<>>(1, icend/*bsize*/, nproma/*, nlev*/, ptsphy, d_pt, d_pq, + d_tend_tmp_t, d_tend_tmp_q, d_tend_tmp_a, d_tend_tmp_cld, + d_tend_loc_t, d_tend_loc_q, d_tend_loc_a, d_tend_loc_cld, + d_pvfa, d_pvfl, d_pvfi, + d_pdyna, d_pdynl, d_pdyni, + d_phrsw, d_phrlw, d_pvervel, + d_pap, d_paph, d_plsm, d_ktype, + d_plu, d_plude, d_psnde, d_pmfu, d_pmfd, + d_pa, d_pclv, d_psupsat, + d_plcrit_aer, d_picrit_aer, d_pre_ice, d_pccn, d_pnice, + d_pcovptot, d_prainfrac_toprfz, d_pfsqlf, + d_pfsqif, d_pfcqnng, d_pfcqlng, + d_pfsqrf, d_pfsqsf, d_pfcqrng, + d_pfcqsng, d_pfsqltur, d_pfsqitur, + d_pfplsl, d_pfplsn, d_pfhpsl, d_pfhpsn, d_yrecldp, + nblocks, rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, + rv, r2es, r3les, r3ies, r4les, r4ies, r5les, + r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, + rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, + rkoop1, rkoop2, + d_zfoealfa, d_ztp1, d_zli, + d_za, d_zaorig, d_zliqfrac, + d_zicefrac, d_zqx, d_zqx0, + d_zpfplsx, d_zlneg, d_zqxn2d, + d_zqsmix, d_zqsliq, d_zqsice, + d_zfoeewmt, d_zfoeew, d_zfoeeliqt); + + gpuErrchk( hipPeekAtLastError() ); + gpuErrchk( hipDeviceSynchronize() ); + + double end = omp_get_wtime(); + + // device to host + hipMemcpy(tend_loc_t, d_tend_loc_t, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(tend_loc_q, d_tend_loc_q, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(tend_loc_a, d_tend_loc_a, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(tend_loc_cld, d_tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv, hipMemcpyDeviceToHost); + hipMemcpy(phrlw, d_phrlw, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(plude, d_plude, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(yrecldp, d_yrecldp, sizeof(TECLDP), hipMemcpyDeviceToHost); + hipMemcpy(pcovptot, d_pcovptot, sizeof(double) * nblocks*nlev*nproma, hipMemcpyDeviceToHost); + hipMemcpy(prainfrac_toprfz, d_prainfrac_toprfz, sizeof(double) * nblocks*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqlf, d_pfsqlf, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqif, d_pfsqif, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqnng, d_pfcqnng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqlng, d_pfcqlng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqrf, d_pfsqrf, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqsf, d_pfsqsf, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqrng, d_pfcqrng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfcqsng, d_pfcqsng, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqltur, d_pfsqltur, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfsqitur, d_pfsqitur, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfplsl, d_pfplsl, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfplsn, d_pfplsn, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfhpsl, d_pfhpsl, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + hipMemcpy(pfhpsn, d_pfhpsn, sizeof(double) * nblocks*(nlev+1)*nproma, hipMemcpyDeviceToHost); + + /* int msec = diff * 1000 / CLOCKS_PER_SEC; */ + zinfo[0][tid] = end - start; + zinfo[1][tid] = (double) coreid; + zinfo[2][tid] = (double) icalls; + zinfo[3][tid] = (double) igpc; + + double t2 = omp_get_wtime(); + + printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); + double zfrac, zmflops; + for (int t = 0; t < numthreads; t++) { + const double tloc = zinfo[0][t]; + const int coreid = (int) zinfo[1][t]; + const int icalls = (int) zinfo[2][t]; + const int igpc = (int) zinfo[3][t]; + zfrac = (double)igpc / (double)numcols; + if (tloc > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + } else { + zmflops = 0.; + } + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + } + double tdiff = t2 - t1; + zfrac = 1.0; + if (tdiff > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + } else { + zmflops = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + + cloudsc_validate(klon, nlev, nclv, numcols, nproma, + plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, + pfcqlng, pfcqnng, pfsqrf, pfsqsf, pfcqrng, pfcqsng, + pfsqltur, pfsqitur, pfplsl, pfplsn, pfhpsl, pfhpsn, + tend_loc_a, tend_loc_q, tend_loc_t, tend_loc_cld); + + free(plcrit_aer); + free(picrit_aer); + free(pre_ice); + free(pccn); + free(pnice); + free(pt); + free(pq); + free(pvfa); + free(pvfl); + free(pvfi); + free(pdyna); + free(pdynl); + free(pdyni); + free(phrsw); + free(phrlw); + free(pvervel); + free(pap); + free(paph); + free(plsm); + free(ktype); + free(plu); + free(plude); + free(psnde); + free(pmfu); + free(pmfd); + free(pa); + free(pclv); + free(psupsat); + free(pcovptot); + free(tend_loc_t); + free(tend_loc_q); + free(tend_loc_a); + free(tend_loc_cld); + free(tend_tmp_t); + free(tend_tmp_q); + free(tend_tmp_a); + free(tend_tmp_cld); + free(tend_cml_t); + free(tend_cml_q); + free(tend_cml_a); + free(tend_cml_cld); + free(prainfrac_toprfz); + free(pfsqlf); + free(pfsqif); + free(pfcqnng); + free(pfcqlng); + free(pfsqrf); + free(pfsqsf); + free(pfcqrng); + free(pfcqsng); + free(pfsqltur); + free(pfsqitur); + free(pfplsl); + free(pfplsn); + free(pfhpsl); + free(pfhpsn); + free(yrecldp); + + // free device + hipFree(d_plcrit_aer); + hipFree(d_picrit_aer); + hipFree(d_pre_ice); + hipFree(d_pccn); + hipFree(d_pnice); + hipFree(d_pt); + hipFree(d_pq); + hipFree(d_tend_loc_t); + hipFree(d_tend_loc_q); + hipFree(d_tend_loc_a); + hipFree(d_tend_loc_cld); + hipFree(d_tend_tmp_t); + hipFree(d_tend_tmp_q); + hipFree(d_tend_tmp_a); + hipFree(d_tend_tmp_cld); + hipFree(d_pvfa); + hipFree(d_pvfl); + hipFree(d_pvfi); + hipFree(d_pdyna); + hipFree(d_pdynl); + hipFree(d_pdyni); + hipFree(d_phrsw); + hipFree(d_phrlw); + hipFree(d_pvervel); + hipFree(d_pap); + hipFree(d_paph); + hipFree(d_plsm); + hipFree(d_ktype); + hipFree(d_plu); + hipFree(d_plude); + hipFree(d_psnde); + hipFree(d_pmfu); + hipFree(d_pmfd); + hipFree(d_pa); + hipFree(d_pclv); + hipFree(d_psupsat); + hipFree(d_yrecldp); + hipFree(d_pcovptot); + hipFree(d_prainfrac_toprfz); + hipFree(d_pfsqlf); + hipFree(d_pfsqif); + hipFree(d_pfcqnng); + hipFree(d_pfcqlng); + hipFree(d_pfsqrf); + hipFree(d_pfsqsf); + hipFree(d_pfcqrng); + hipFree(d_pfcqsng); + hipFree(d_pfsqltur); + hipFree(d_pfsqitur); + hipFree(d_pfplsl); + hipFree(d_pfplsn); + hipFree(d_pfhpsl); + hipFree(d_pfhpsn); + hipFree(d_zfoealfa); + hipFree(d_ztp1); + hipFree(d_zli); + hipFree(d_za); + hipFree(d_zaorig); + hipFree(d_zliqfrac); + hipFree(d_zicefrac); + hipFree(d_zqx); + hipFree(d_zqx0); + hipFree(d_zpfplsx); + hipFree(d_zlneg); + hipFree(d_zqxn2d); + hipFree(d_zqsmix); + hipFree(d_zqsliq); + hipFree(d_zqsice); + hipFree(d_zfoeewmt); + hipFree(d_zfoeew); + hipFree(d_zfoeeliqt); + // end free device +} + diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.h b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.h new file mode 100644 index 00000000..aca23ab7 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.h @@ -0,0 +1,23 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include "hip/hip_runtime.h" + +#include "yoecldp_c.h" +#include "load_state.h" +#include "cloudsc_c_hoist.h" +#include "cloudsc_validate.h" + +void cloudsc_driver(int numthreads, int numcols, int nproma); diff --git a/src/cloudsc_hip/cloudsc/cloudsc_validate.cpp b/src/cloudsc_hip/cloudsc/cloudsc_validate.cpp new file mode 100644 index 00000000..79566e43 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_validate.cpp @@ -0,0 +1,244 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "cloudsc_validate.h" + +#include +#include + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + + +void print_error(const char *name, double zminval, double zmaxval, double zmaxerr, + double zerrsum, double zsum, double zavgpgp, int ndim) +{ + double zrelerr, zeps = DBL_EPSILON; + int iopt = 0; + if (zerrsum < zeps) { + zrelerr = 0.0; + iopt = 1; + } else if (zsum < zeps) { + zrelerr = zerrsum / (1.0 + zsum); + iopt = 2; + } else { + zrelerr = zerrsum / zsum; + iopt = 3; + } + + //-- If you get 4 exclamation marks next to your error output, + // then it is likely that some uninitialized variables exists or + // some other screw-up -- watch out this !!!! + //char *clwarn; + const char* clwarn = (zrelerr > 10.0 * zeps) ? " !!!!" : " "; + zrelerr = 100.0 * zrelerr; + + printf(" %+20s %dD%d %20.13le %20.13le %20.13le %20.13le %20.13le %s\n", + name, ndim, iopt, zminval, zmaxval, zmaxerr, zavgpgp, zrelerr, clwarn); +} + + +void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, int ngptot, int nblocks) +{ + /* Computes and prints errors in the "L2 norm sense" */ + int b, bsize, jk; + double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; + //double (*field)[nlon] = (double (*)[nlon]) v_field; + //double (*reference)[nlon] = (double (*)[nlon]) v_ref; + + zminval = +DBL_MAX; + zmaxval = -DBL_MAX; + zmaxerr = 0.0; + zerrsum = 0.0; + zsum = 0.0; + +// #pragma omp parallel for default(shared) private(b, bsize, jk) \ +// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + for (b = 0; b < nblocks; b++) { + bsize = min(nlon, ngptot - b*nlon); // field block size + for (jk = 0; jk < bsize; jk++) { + zminval = fmin(zminval, v_field[b*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nlon+jk]); + + // Difference against reference result in one-norm sense + zdiff = fabs(v_field[b*nlon+jk] - v_ref[b*nlon+jk]); + zmaxerr = fmax(zmaxerr, zdiff); + zerrsum = zerrsum + zdiff; + zsum = zsum + abs(v_ref[b*nlon+jk]); + } + } + zavgpgp = zerrsum / (double) ngptot; + print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); +} + + +void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int nlev, int ngptot, int nblocks) +{ + /* Computes and prints errors in the "L2 norm sense" */ + int b, bsize, jl, jk; + double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; +// double (*field)[nlev][nlon] = (double (*)[nlev][nlon]) v_field; +// double (*reference)[nlev][nlon] = (double (*)[nlev][nlon]) v_ref; + + zminval = +DBL_MAX; + zmaxval = -DBL_MAX; + zmaxerr = 0.0; + zerrsum = 0.0; + zsum = 0.0; + +// #pragma omp parallel for default(shared) private(b, bsize, jl, jk) \ +// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + + for (b = 0; b < nblocks; b++) { + bsize = min(nlon, ngptot - b*nlon); // field block size + for (jl = 0; jl < nlev; jl++) { + for (jk = 0; jk < bsize; jk++) { + zminval = fmin(zminval, v_field[b*nlev*nlon+jl*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nlev*nlon+jl*nlon+jk]); + + // Difference against reference result in one-norm sense + zdiff = fabs(v_field[b*nlev*nlon+jl*nlon+jk] - v_ref[b*nlev*nlon+jl*nlon+jk]); + zmaxerr = fmax(zmaxerr, zdiff); + zerrsum = zerrsum + zdiff; + zsum = zsum + abs(v_ref[b*nlev*nlon+jl*nlon+jk]); + } + } + } + + zavgpgp = zerrsum / (double) ngptot; + print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); +} + + +void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, + int nlev, int nclv, int ngptot, int nblocks) +{ + /* Computes and prints errors in the "L2 norm sense" */ + int b, bsize, jl, jk, jm; + double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; +// double (*field)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_field; +// double (*reference)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_ref; + + zminval = +DBL_MAX; + zmaxval = -DBL_MAX; + zmaxerr = 0.0; + zerrsum = 0.0; + zsum = 0.0; + +// #pragma omp parallel for default(shared) private(b, bsize, jl, jk, jm) \ +// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + for (b = 0; b < nblocks; b++) { + bsize = min(nlon, ngptot - b*nlon); // field block size + for (jm = 0; jm < nclv; jm++) { + for (jl = 0; jl < nlev; jl++) { + for (jk = 0; jk < bsize; jk++) { + zminval = fmin(zminval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + + // Difference against reference result in one-norm sense + zdiff = fabs(v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk] - v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + zmaxerr = fmax(zmaxerr, zdiff); + zerrsum = zerrsum + zdiff; + zsum = zsum + abs(v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + } + } + } + } + zavgpgp = zerrsum / (double) ngptot; + print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); +} + + +int cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) +{ + const int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + double *ref_plude = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_pcovptot = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_prainfrac_toprfz = (double*) malloc( sizeof(double) * nblocks*nproma ); + double *ref_pfsqlf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqif = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqlng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqnng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqrf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqsf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqrng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqsng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqltur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqitur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfplsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfplsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfhpsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfhpsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_tend_loc_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_tend_loc_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_tend_loc_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_tend_loc_cld = (double*) malloc( sizeof(double) * nblocks*nclv*nlev*nproma ); + + load_reference(nlon, nlev, nclv, ngptot, nproma, + ref_plude, ref_pcovptot, ref_prainfrac_toprfz, ref_pfsqlf, ref_pfsqif, + ref_pfcqlng, ref_pfcqnng, ref_pfsqrf, ref_pfsqsf, ref_pfcqrng, ref_pfcqsng, + ref_pfsqltur, ref_pfsqitur, ref_pfplsl, ref_pfplsn, ref_pfhpsl, ref_pfhpsn, + ref_tend_loc_a, ref_tend_loc_q, ref_tend_loc_t, ref_tend_loc_cld); + + + printf(" %+20s %s %+20s %+20s %+20s %+20s %+20s\n", + "Variable", "Dim", "MinValue", "MaxValue", "AbsMaxErr", "AvgAbsErr/GP", "MaxRelErr-%"); + + validate_2d("PLUDE", ref_plude, plude, nproma, nlev, ngptot, nblocks); + validate_2d("PCOVPTOT", ref_pcovptot, pcovptot, nproma, nlev, ngptot, nblocks); + validate_1d("PRAINFRAC_TOPRFZ", ref_prainfrac_toprfz, prainfrac_toprfz, nproma, ngptot, nblocks); + validate_2d("PFSQLF", ref_pfsqlf, pfsqlf, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQIF", ref_pfsqif, pfsqif, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQLNG", ref_pfcqlng, pfcqlng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQNNG", ref_pfcqnng, pfcqnng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQRF", ref_pfsqrf, pfsqrf, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQSF", ref_pfsqsf, pfsqsf, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQRNG", ref_pfcqrng, pfcqrng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQSNG", ref_pfcqsng, pfcqsng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQLTUR", ref_pfsqltur, pfsqltur, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQITUR", ref_pfsqitur, pfsqitur, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFPLSL", ref_pfplsl, pfplsl, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFPLSN", ref_pfplsn, pfplsn, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFHPSL", ref_pfhpsl, pfhpsl, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFHPSN", ref_pfhpsn, pfhpsn, nproma, nlev+1, ngptot, nblocks); + validate_2d("TENDENCY_LOC%A", ref_tend_loc_a, tend_loc_a, nproma, nlev, ngptot, nblocks); + validate_2d("TENDENCY_LOC%Q", ref_tend_loc_q, tend_loc_q, nproma, nlev, ngptot, nblocks); + validate_2d("TENDENCY_LOC%T", ref_tend_loc_t, tend_loc_t, nproma, nlev, ngptot, nblocks); + validate_3d("TENDENCY_LOC%CLD", ref_tend_loc_cld, tend_loc_cld, nproma, nlev, nclv, ngptot, nblocks); + + free(ref_plude); + free(ref_pcovptot); + free(ref_prainfrac_toprfz); + free(ref_pfsqlf); + free(ref_pfsqif); + free(ref_pfcqlng); + free(ref_pfcqnng); + free(ref_pfsqrf); + free(ref_pfsqsf); + free(ref_pfcqrng); + free(ref_pfcqsng); + free(ref_pfsqltur); + free(ref_pfsqitur); + free(ref_pfplsl); + free(ref_pfplsn); + free(ref_pfhpsl); + free(ref_pfhpsn); + free(ref_tend_loc_a); + free(ref_tend_loc_q); + free(ref_tend_loc_t); + free(ref_tend_loc_cld); + + return 0; + +} diff --git a/src/cloudsc_hip/cloudsc/cloudsc_validate.h b/src/cloudsc_hip/cloudsc/cloudsc_validate.h new file mode 100644 index 00000000..155b4274 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/cloudsc_validate.h @@ -0,0 +1,17 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "load_state.h" + +int cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld); diff --git a/src/cloudsc_hip/cloudsc/load_state.cpp b/src/cloudsc_hip/cloudsc/load_state.cpp new file mode 100644 index 00000000..38fc148f --- /dev/null +++ b/src/cloudsc_hip/cloudsc/load_state.cpp @@ -0,0 +1,447 @@ +#include "load_state.h" +//#include "yomcst_c.hpp" +#include + +#include +#include "serialbox-c/Serialbox.h" + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +/* Query sizes and dimensions of state arrays */ +void query_state(int *klon, int *klev) +{ + serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); + serialboxMetainfo_t* globalMetainfo = serialboxSerializerGetGlobalMetainfo(serializer); + + *klon = serialboxMetainfoGetInt32(globalMetainfo, "KLON"); + *klev = serialboxMetainfoGetInt32(globalMetainfo, "KLEV"); + + serialboxMetainfoDestroy(globalMetainfo); + serialboxSerializerDestroy(serializer); +} + +void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) +{ + int gidx, bsize, bidx, bend, fsize, b, l, i; + + #pragma omp parallel for default(shared) private(gidx, bsize, bidx, bend, fsize, b, l, i) + for (b = 0; b < nblocks; b++) { + gidx = b*nproma; // Global starting index of the block in the general domain + bsize = min(nproma, ngptot - gidx); // Size of the field block + bidx = gidx % nlon; // Rolling index into the input buffer + bend = min(bidx+bsize, nlon); // Idealised final index in the input buffer + + if (bend-bidx < bsize) { + // The input buffer does not hold enough data to fill field block; + // we need to fill the rest of the block with data from front of buffer. + fsize = nlon - bidx; + for (i = 0; i < fsize; i++) { field_in[b*nproma+i] = buffer[bidx + i]; } + for (i = 0; i < bsize-fsize; i++) { field_in[b*nproma+fsize+i] = buffer[i]; } + } else { + // Simply copy a block of data from the rolling buffer index + for (i = 0; i < bsize; i++) { field_in[b*nproma+i] = buffer[bidx+i]; } + } + // Zero out the remainder of last block + for (i=bsize; iramid = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RAMID"); + yrecldp->rcldiff = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDIFF"); + yrecldp->rcldiff_convi = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDIFF_CONVI"); + yrecldp->rclcrit = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLCRIT"); + yrecldp->rclcrit_sea = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLCRIT_SEA"); + yrecldp->rclcrit_land = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLCRIT_LAND"); + yrecldp->rkconv = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RKCONV"); + yrecldp->rprc1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPRC1"); + yrecldp->rprc2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPRC2"); + yrecldp->rcldmax = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDMAX"); + yrecldp->rpecons = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPECONS"); + yrecldp->rvrfactor = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVRFACTOR"); + yrecldp->rprecrhmax = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPRECRHMAX"); + yrecldp->rtaumel = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RTAUMEL"); + yrecldp->ramin = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RAMIN"); + yrecldp->rlmin = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RLMIN"); + yrecldp->rkooptau = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RKOOPTAU"); + + yrecldp->rcldtopp = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDTOPP"); + yrecldp->rlcritsnow = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RLCRITSNOW"); + yrecldp->rsnowlin1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RSNOWLIN1"); + yrecldp->rsnowlin2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RSNOWLIN2"); + yrecldp->ricehi1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RICEHI1"); + yrecldp->ricehi2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RICEHI2"); + yrecldp->riceinit = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RICEINIT"); + yrecldp->rvice = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVICE"); + yrecldp->rvrain = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVRAIN"); + yrecldp->rvsnow = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVSNOW"); + yrecldp->rthomo = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RTHOMO"); + yrecldp->rcovpmin = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCOVPMIN"); + yrecldp->rccn = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCN"); + yrecldp->rnice = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RNICE"); + yrecldp->rccnom = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCNOM"); + yrecldp->rccnss = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCNSS"); + yrecldp->rccnsu = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCNSU"); + yrecldp->rcldtopcf = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDTOPCF"); + yrecldp->rdepliqrefrate = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDEPLIQREFRATE"); + yrecldp->rdepliqrefdepth = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDEPLIQREFDEPTH"); + yrecldp->rcl_kkaac = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKAac"); + yrecldp->rcl_kkbac = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKBac"); + yrecldp->rcl_kkaau = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKAau"); + yrecldp->rcl_kkbauq = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKBauq"); + yrecldp->rcl_kkbaun = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKBaun"); + yrecldp->rcl_kk_cloud_num_sea = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KK_cloud_num_sea"); + yrecldp->rcl_kk_cloud_num_land = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KK_cloud_num_land"); + yrecldp->rcl_ai = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_AI"); + yrecldp->rcl_bi = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_BI"); + yrecldp->rcl_ci = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CI"); + yrecldp->rcl_di = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DI"); + yrecldp->rcl_x1i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X1I"); + yrecldp->rcl_x2i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X2I"); + yrecldp->rcl_x3i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X3I"); + yrecldp->rcl_x4i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X4I"); + yrecldp->rcl_const1i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST1I"); + yrecldp->rcl_const2i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST2I"); + yrecldp->rcl_const3i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST3I"); + yrecldp->rcl_const4i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST4I"); + yrecldp->rcl_const5i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST5I"); + yrecldp->rcl_const6i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST6I"); + yrecldp->rcl_apb1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_APB1"); + yrecldp->rcl_apb2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_APB2"); + yrecldp->rcl_apb3 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_APB3"); + yrecldp->rcl_as = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_AS"); + yrecldp->rcl_bs = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_BS"); + yrecldp->rcl_cs = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CS"); + yrecldp->rcl_ds = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DS"); + yrecldp->rcl_x1s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X1S"); + yrecldp->rcl_x2s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X2S"); + yrecldp->rcl_x3s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X3S"); + yrecldp->rcl_x4s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X4S"); + yrecldp->rcl_const1s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST1S"); + yrecldp->rcl_const2s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST2S"); + yrecldp->rcl_const3s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST3S"); + yrecldp->rcl_const4s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST4S"); + yrecldp->rcl_const5s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST5S"); + yrecldp->rcl_const6s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST6S"); + yrecldp->rcl_const7s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST7S"); + yrecldp->rcl_const8s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST8S"); + yrecldp->rdenswat = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDENSWAT"); + yrecldp->rdensref = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDENSREF"); + yrecldp->rcl_ar = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_AR"); + yrecldp->rcl_br = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_BR"); + yrecldp->rcl_cr = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CR"); + yrecldp->rcl_dr = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DR"); + yrecldp->rcl_x1r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X1R"); + yrecldp->rcl_x2r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X2R"); + yrecldp->rcl_x4r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X4R"); + yrecldp->rcl_ka273 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KA273"); + yrecldp->rcl_cdenom1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CDENOM1"); + yrecldp->rcl_cdenom2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CDENOM2"); + yrecldp->rcl_cdenom3 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CDENOM3"); + yrecldp->rcl_schmidt = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_SCHMIDT"); + yrecldp->rcl_dynvisc = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DYNVISC"); + yrecldp->rcl_const1r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST1R"); + yrecldp->rcl_const2r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST2R"); + yrecldp->rcl_const3r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST3R"); + yrecldp->rcl_const4r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST4R"); + yrecldp->rcl_fac1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FAC1"); + yrecldp->rcl_fac2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FAC2"); + yrecldp->rcl_const5r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST5R"); + yrecldp->rcl_const6r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST6R"); + yrecldp->rcl_fzrab = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FZRAB"); + yrecldp->rcl_fzrbb = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FZRBB"); + yrecldp->lcldextra = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LCLDEXTRA"); + yrecldp->lcldbudget = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LCLDBUDGET"); + yrecldp->nssopt = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NSSOPT"); + yrecldp->ncldtop = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NCLDTOP"); + yrecldp->naeclbc = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLBC"); + yrecldp->naecldu = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLDU"); + yrecldp->naeclom = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLOM"); + yrecldp->naeclss = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLSS"); + yrecldp->naeclsu = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLSU"); + yrecldp->nclddiag = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NCLDDIAG"); + yrecldp->naercld = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAERCLD"); + yrecldp->laerliqautolsp = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQAUTOLSP"); + yrecldp->laerliqautocp = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQAUTOCP"); + yrecldp->laerliqautocpb = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQAUTOCPB"); + yrecldp->laerliqcoll = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQCOLL"); + yrecldp->laericesed = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERICESED"); + yrecldp->laericeauto = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERICEAUTO"); + yrecldp->nshapep = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NSHAPEP"); + yrecldp->nshapeq = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NSHAPEQ"); + yrecldp->nbeta = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NBETA"); + + serialboxSerializerDestroySavepointVector(savepoints, 1); + serialboxSerializerDestroy(serializer); +} + +/* Read reference result into memory */ +void load_reference(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) +{ + serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "reference", "Binary"); + serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); + serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); + serialboxSavepoint_t* savepoint = savepoints[0]; + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + + load_and_expand_2d(serializer, savepoint, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(serializer, savepoint, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); + load_and_expand_1d(serializer, savepoint, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); + load_and_expand_2d(serializer, savepoint, "PFSQLF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqlf); + load_and_expand_2d(serializer, savepoint, "PFSQIF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqif); + load_and_expand_2d(serializer, savepoint, "PFCQLNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqlng); + load_and_expand_2d(serializer, savepoint, "PFCQNNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqnng); + load_and_expand_2d(serializer, savepoint, "PFSQRF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqrf); + load_and_expand_2d(serializer, savepoint, "PFSQSF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqsf); + load_and_expand_2d(serializer, savepoint, "PFCQRNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqrng); + load_and_expand_2d(serializer, savepoint, "PFCQSNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqsng); + load_and_expand_2d(serializer, savepoint, "PFSQLTUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqltur); + load_and_expand_2d(serializer, savepoint, "PFSQITUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqitur); + load_and_expand_2d(serializer, savepoint, "PFPLSL", nlon, nlev+1, nproma, ngptot, nblocks, pfplsl); + load_and_expand_2d(serializer, savepoint, "PFPLSN", nlon, nlev+1, nproma, ngptot, nblocks, pfplsn); + load_and_expand_2d(serializer, savepoint, "PFHPSL", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsl); + load_and_expand_2d(serializer, savepoint, "PFHPSN", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsn); + load_and_expand_2d(serializer, savepoint, "TENDENCY_LOC_T", nlon, nlev, nproma, ngptot, nblocks, tend_loc_t); + load_and_expand_2d(serializer, savepoint, "TENDENCY_LOC_Q", nlon, nlev, nproma, ngptot, nblocks, tend_loc_q); + load_and_expand_2d(serializer, savepoint, "TENDENCY_LOC_A", nlon, nlev, nproma, ngptot, nblocks, tend_loc_a); + load_and_expand_3d(serializer, savepoint, "TENDENCY_LOC_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_loc_cld); + + serialboxSerializerDestroySavepointVector(savepoints, 1); + serialboxSerializerDestroy(serializer); +} diff --git a/src/cloudsc_hip/cloudsc/load_state.h b/src/cloudsc_hip/cloudsc/load_state.h new file mode 100644 index 00000000..65fbf8c2 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/load_state.h @@ -0,0 +1,40 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include "yoecldp_c.h" + +struct TECLDP ; + +void query_state(int *klon, int *klev); + +void load_state(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double* ptsphy, double* plcrit_aer, double* picrit_aer, + double* pre_ice, double* pccn, double* pnice, double* pt, double* pq, + double* tend_cml_t, double* tend_cml_q, double* tend_cml_a, double* tend_cml_cld, + double* tend_tmp_t, double* tend_tmp_q, double* tend_tmp_a, double* tend_tmp_cld, + double* pvfa, double* pvfl, double* pvfi, double* pdyna, double* pdynl, double* pdyni, + double* phrsw, double* phrlw, double* pvervel, double* pap, double* paph, double* plsm, + int* ktype, double* plu, double* plude, double* psnde, double* pmfu, + double* pmfd, double* pa, double* pclv, double* psupsat, struct TECLDP* yrecldp, + double* rg, double* rd, double* rcpd, double* retv, double* rlvtt, double* rlstt, + double* rlmlt, double* rtt, double* rv, double* r2es, double* r3les, double* r3ies, + double* r4les, double* r4ies, double* r5les, double* r5ies, double* r5alvcp, double* r5alscp, + double* ralvdcp, double* ralsdcp, double* ralfdcp, double* rtwat, + double* rtice, double* rticecu, double* rtwat_rtice_r, double *rtwat_rticecu_r, + double* rkoop1, double* rkoop2); + + +void load_reference(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld); diff --git a/src/cloudsc_hip/cloudsc/mycpu.cpp b/src/cloudsc_hip/cloudsc/mycpu.cpp new file mode 100644 index 00000000..8c6e8506 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/mycpu.cpp @@ -0,0 +1,31 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#if defined(__APPLE__) +static int sched_getcpu() { return 0; } +#else +#include +#endif + +/* + * Find the core the thread belongs to + */ + +int mycpu_ () +{ + /* int sched_getcpu(void); */ + int cpu; +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wimplicit-function-declaration" + cpu = sched_getcpu(); +#pragma clang diagnostic pop + return cpu; +} +int mycpu() { return mycpu_(); } diff --git a/src/cloudsc_hip/cloudsc/mycpu.h b/src/cloudsc_hip/cloudsc/mycpu.h new file mode 100644 index 00000000..6b26848e --- /dev/null +++ b/src/cloudsc_hip/cloudsc/mycpu.h @@ -0,0 +1,11 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +int mycpu (); diff --git a/src/cloudsc_hip/cloudsc/yoecldp_c.h b/src/cloudsc_hip/cloudsc/yoecldp_c.h new file mode 100644 index 00000000..7fcace99 --- /dev/null +++ b/src/cloudsc_hip/cloudsc/yoecldp_c.h @@ -0,0 +1,145 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#ifndef YOECLDP_H +#define YOECLDP_H + +//int nclv; // number of microphysics variables +//int ncldql; // liquid cloud water +//int ncldqi; // ice cloud water +//int ncldqr; // rain water +//int ncldqs; // snow +//int ncldqv; // vapour + +struct TECLDP { + double ramid; + double rcldiff; + double rcldiff_convi; + double rclcrit; + double rclcrit_sea; + double rclcrit_land; + double rkconv; + double rprc1; + double rprc2; + double rcldmax; + double rpecons; + double rvrfactor; + double rprecrhmax; + double rtaumel; + double ramin; + double rlmin; + double rkooptau; + double rcldtopp; + double rlcritsnow; + double rsnowlin1; + double rsnowlin2; + double ricehi1; + double ricehi2; + double riceinit; + double rvice; + double rvrain; + double rvsnow; + double rthomo; + double rcovpmin; + double rccn; + double rnice; + double rccnom; + double rccnss; + double rccnsu; + double rcldtopcf; + double rdepliqrefrate; + double rdepliqrefdepth; + double rcl_kkaac; + double rcl_kkbac; + double rcl_kkaau; + double rcl_kkbauq; + double rcl_kkbaun; + double rcl_kk_cloud_num_sea; + double rcl_kk_cloud_num_land; + double rcl_ai; + double rcl_bi; + double rcl_ci; + double rcl_di; + double rcl_x1i; + double rcl_x2i; + double rcl_x3i; + double rcl_x4i; + double rcl_const1i; + double rcl_const2i; + double rcl_const3i; + double rcl_const4i; + double rcl_const5i; + double rcl_const6i; + double rcl_apb1; + double rcl_apb2; + double rcl_apb3; + double rcl_as; + double rcl_bs; + double rcl_cs; + double rcl_ds; + double rcl_x1s; + double rcl_x2s; + double rcl_x3s; + double rcl_x4s; + double rcl_const1s; + double rcl_const2s; + double rcl_const3s; + double rcl_const4s; + double rcl_const5s; + double rcl_const6s; + double rcl_const7s; + double rcl_const8s; + double rdenswat; + double rdensref; + double rcl_ar; + double rcl_br; + double rcl_cr; + double rcl_dr; + double rcl_x1r; + double rcl_x2r; + double rcl_x4r; + double rcl_ka273; + double rcl_cdenom1; + double rcl_cdenom2; + double rcl_cdenom3; + double rcl_schmidt; + double rcl_dynvisc; + double rcl_const1r; + double rcl_const2r; + double rcl_const3r; + double rcl_const4r; + double rcl_fac1; + double rcl_fac2; + double rcl_const5r; + double rcl_const6r; + double rcl_fzrab; + double rcl_fzrbb; + int lcldextra, lcldbudget; + int nssopt; + int ncldtop; + int naeclbc, naecldu, naeclom, naeclss, naeclsu; + int nclddiag; + int naercld; + int laerliqautolsp; + int laerliqautocp; + int laerliqautocpb; + int laerliqcoll; + int laericesed; + int laericeauto; + double nshapep; + double nshapeq; + int nbeta; + //double rbeta[0][100]; + //double rbetap1[0][100]; +} ; + +//struct TECLDP *yrecldp; + +#endif diff --git a/src/cloudsc_hip/dwarf_cloudsc.cpp b/src/cloudsc_hip/dwarf_cloudsc.cpp new file mode 100644 index 00000000..43b91b84 --- /dev/null +++ b/src/cloudsc_hip/dwarf_cloudsc.cpp @@ -0,0 +1,44 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "cloudsc_driver.h" + + +int main( int argc, char *argv[] ) { + + int omp_threads, ngptot, nproma; + int return_code; + + return_code = 0; + + // default values + omp_threads = 1; + ngptot = 100; + nproma = 4; + + if (argc == 1) { + cloudsc_driver(omp_threads, ngptot, nproma); + } + else if (argc == 4) { + omp_threads = atoi( argv[1] ); + ngptot = atoi( argv[2] ); + nproma = atoi( argv[3] ); + cloudsc_driver(omp_threads, ngptot, nproma); + } + else { + printf("Calling c-cloudsc with the right number of arguments will work better ;-) \n",argc); + return_code = EXIT_FAILURE; + } + + return return_code; +} From 7c397fe4cbf60ec5660d31c2dd7f58387024f15c Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Mon, 4 Sep 2023 16:52:55 +0200 Subject: [PATCH 073/174] Change in load state in order to allow correct validation for nproma = 128 --- src/cloudsc_hip/cloudsc/load_state.cpp | 122 +++++++------------------ 1 file changed, 34 insertions(+), 88 deletions(-) diff --git a/src/cloudsc_hip/cloudsc/load_state.cpp b/src/cloudsc_hip/cloudsc/load_state.cpp index 38fc148f..d8e7e03d 100644 --- a/src/cloudsc_hip/cloudsc/load_state.cpp +++ b/src/cloudsc_hip/cloudsc/load_state.cpp @@ -23,124 +23,70 @@ void query_state(int *klon, int *klev) void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) { - int gidx, bsize, bidx, bend, fsize, b, l, i; + int b, l, i, buf_start_idx, buf_idx; - #pragma omp parallel for default(shared) private(gidx, bsize, bidx, bend, fsize, b, l, i) +#pragma omp parallel for default(shared) private(b, l, i, buf_start_idx, buf_idx) for (b = 0; b < nblocks; b++) { - gidx = b*nproma; // Global starting index of the block in the general domain - bsize = min(nproma, ngptot - gidx); // Size of the field block - bidx = gidx % nlon; // Rolling index into the input buffer - bend = min(bidx+bsize, nlon); // Idealised final index in the input buffer - - if (bend-bidx < bsize) { - // The input buffer does not hold enough data to fill field block; - // we need to fill the rest of the block with data from front of buffer. - fsize = nlon - bidx; - for (i = 0; i < fsize; i++) { field_in[b*nproma+i] = buffer[bidx + i]; } - for (i = 0; i < bsize-fsize; i++) { field_in[b*nproma+fsize+i] = buffer[i]; } - } else { - // Simply copy a block of data from the rolling buffer index - for (i = 0; i < bsize; i++) { field_in[b*nproma+i] = buffer[bidx+i]; } + buf_start_idx = ((b)*nproma) % nlon; + for (i = 0; i < nproma; i++) { + buf_idx = (buf_start_idx + i) % nlon; + // field[b][i] = buffer[buf_idx]; + field_in[b*nproma+i] = buffer[buf_idx]; } - // Zero out the remainder of last block - for (i=bsize; i Date: Tue, 12 Sep 2023 16:37:24 +0200 Subject: [PATCH 074/174] using two MultiField-s to group variables under one block; still relying on FieldSet for other datatype/dimension/level_number variables; using Field lookup by name --- .../cloudsc_global_atlas_state_mod.F90 | 191 +++++++++++------- .../dwarf_cloudsc_atlas.F90 | 13 +- .../expand_atlas_mod.F90 | 7 +- .../validate_atlas_mod.F90 | 26 +-- 4 files changed, 138 insertions(+), 99 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 5ccb174e..c4e21af2 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -24,6 +24,7 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD USE ATLAS_MODULE USE, INTRINSIC :: ISO_C_BINDING USE ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS_MODULE + USE ATLAS_MULTIFIELD_MODULE IMPLICIT NONE @@ -135,104 +136,124 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) - CALL FSET%DATA(1, SELF%PLCRIT_AER, IBLK) - CALL FSET%DATA(2, SELF%PICRIT_AER, IBLK) - CALL FSET%DATA(3, SELF%PRE_ICE, IBLK) - CALL FSET%DATA(4, SELF%PCCN, IBLK) - CALL FSET%DATA(5, SELF%PNICE, IBLK) - CALL FSET%DATA(6, SELF%PT, IBLK) - CALL FSET%DATA(7, SELF%PQ, IBLK) - CALL FSET%DATA(8, SELF%PVFA, IBLK) - CALL FSET%DATA(9, SELF%PVFL, IBLK) - CALL FSET%DATA(10, SELF%PVFI, IBLK) - CALL FSET%DATA(11, SELF%PDYNA, IBLK) - CALL FSET%DATA(12, SELF%PDYNL, IBLK) - CALL FSET%DATA(13, SELF%PDYNI, IBLK) - CALL FSET%DATA(14, SELF%PHRSW, IBLK) - CALL FSET%DATA(15, SELF%PHRLW, IBLK) - CALL FSET%DATA(16, SELF%PVERVEL, IBLK) - CALL FSET%DATA(17, SELF%PAP, IBLK) - CALL FSET%DATA(18, SELF%PLU, IBLK) - CALL FSET%DATA(19, SELF%PLUDE, IBLK) - CALL FSET%DATA(20, SELF%PSNDE, IBLK) - CALL FSET%DATA(21, SELF%PMFU, IBLK) - CALL FSET%DATA(22, SELF%PMFD, IBLK) - CALL FSET%DATA(23, SELF%PA, IBLK) - CALL FSET%DATA(24, SELF%PSUPSAT, IBLK) - CALL FSET%DATA(25, SELF%PLSM, IBLK) - CALL FSET%DATA(26, SELF%LDCUM, IBLK) - CALL FSET%DATA(27, SELF%KTYPE, IBLK) - CALL FSET%DATA(28, SELF%PAPH, IBLK) - CALL FSET%DATA(29, SELF%PEXTRA, IBLK) - CALL FSET%DATA(30, SELF%PCLV, IBLK) - - CALL FSET%DATA(31, TMP3D, IBLK) +! CALL FSET%UPDATE_DEVICE(1,IBLK) +! field = fset%field(1) +! call field%update_device() + + CALL FSET%DATA("PLCRIT_AER", SELF%PLCRIT_AER, IBLK) + CALL FSET%DATA("PICRIT_AER", SELF%PICRIT_AER, IBLK) + CALL FSET%DATA("PRE_ICE", SELF%PRE_ICE, IBLK) + CALL FSET%DATA("PCCN", SELF%PCCN, IBLK) + CALL FSET%DATA("PNICE", SELF%PNICE, IBLK) + CALL FSET%DATA("PT", SELF%PT, IBLK) + CALL FSET%DATA("PQ", SELF%PQ, IBLK) + CALL FSET%DATA("PVFA", SELF%PVFA, IBLK) + CALL FSET%DATA("PVFL", SELF%PVFL, IBLK) + CALL FSET%DATA("PVFI", SELF%PVFI, IBLK) + CALL FSET%DATA("PDYNA", SELF%PDYNA, IBLK) + CALL FSET%DATA("PDYNL", SELF%PDYNL, IBLK) + CALL FSET%DATA("PDYNI", SELF%PDYNI, IBLK) + CALL FSET%DATA("PHRSW", SELF%PHRSW, IBLK) + CALL FSET%DATA("PHRLW", SELF%PHRLW, IBLK) + CALL FSET%DATA("PVERVEL", SELF%PVERVEL, IBLK) + CALL FSET%DATA("PAP", SELF%PAP, IBLK) + CALL FSET%DATA("PLU", SELF%PLU, IBLK) + CALL FSET%DATA("PLUDE", SELF%PLUDE, IBLK) + CALL FSET%DATA("PSNDE", SELF%PSNDE, IBLK) + CALL FSET%DATA("PMFU", SELF%PMFU, IBLK) + CALL FSET%DATA("PMFD", SELF%PMFD, IBLK) + CALL FSET%DATA("PA", SELF%PA, IBLK) + CALL FSET%DATA("PSUPSAT", SELF%PSUPSAT, IBLK) + CALL FSET%DATA("PLSM", SELF%PLSM, IBLK) + CALL FSET%DATA("LDCUM", SELF%LDCUM, IBLK) + CALL FSET%DATA("KTYPE", SELF%KTYPE, IBLK) + CALL FSET%DATA("PAPH", SELF%PAPH, IBLK) + CALL FSET%DATA("PEXTRA", SELF%PEXTRA, IBLK) + CALL FSET%DATA("PCLV", SELF%PCLV, IBLK) + + CALL FSET%DATA("TENDENCY_CML", TMP3D, IBLK) SELF%TENDENCY_CML%T => TMP3D(:,:,1) SELF%TENDENCY_CML%A => TMP3D(:,:,2) SELF%TENDENCY_CML%Q => TMP3D(:,:,3) SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(32, TMP3D, IBLK) + CALL FSET%DATA("TENDENCY_TMP", TMP3D, IBLK) SELF%TENDENCY_TMP%T => TMP3D(:,:,1) SELF%TENDENCY_TMP%A => TMP3D(:,:,2) SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(33, TMP3D, IBLK) + CALL FSET%DATA("TENDENCY_LOC", TMP3D, IBLK) SELF%TENDENCY_LOC%T => TMP3D(:,:,1) SELF%TENDENCY_LOC%A => TMP3D(:,:,2) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(34, SELF%PFSQLF, IBLK) - CALL FSET%DATA(35, SELF%PFSQIF, IBLK) - CALL FSET%DATA(36, SELF%PFCQLNG, IBLK) - CALL FSET%DATA(37, SELF%PFCQNNG, IBLK) - CALL FSET%DATA(38, SELF%PFSQRF, IBLK) - CALL FSET%DATA(39, SELF%PFSQSF, IBLK) - CALL FSET%DATA(40, SELF%PFCQRNG, IBLK) - CALL FSET%DATA(41, SELF%PFCQSNG, IBLK) - CALL FSET%DATA(42, SELF%PFSQLTUR, IBLK) - CALL FSET%DATA(43, SELF%PFSQITUR, IBLK) - CALL FSET%DATA(44, SELF%PFPLSL, IBLK) - CALL FSET%DATA(45, SELF%PFPLSN, IBLK) - CALL FSET%DATA(46, SELF%PFHPSL, IBLK) - CALL FSET%DATA(47, SELF%PFHPSN, IBLK) - CALL FSET%DATA(48, SELF%PCOVPTOT, IBLK) - CALL FSET%DATA(49, SELF%PRAINFRAC_TOPRFZ, IBLK) + CALL FSET%DATA("PFSQLF", SELF%PFSQLF, IBLK) + CALL FSET%DATA("PFSQIF", SELF%PFSQIF, IBLK) + CALL FSET%DATA("PFCQLNG", SELF%PFCQLNG, IBLK) + CALL FSET%DATA("PFCQNNG", SELF%PFCQNNG, IBLK) + CALL FSET%DATA("PFSQRF", SELF%PFSQRF, IBLK) + CALL FSET%DATA("PFSQSF", SELF%PFSQSF, IBLK) + CALL FSET%DATA("PFCQRNG", SELF%PFCQRNG, IBLK) + CALL FSET%DATA("PFCQSNG", SELF%PFCQSNG, IBLK) + CALL FSET%DATA("PFSQLTUR", SELF%PFSQLTUR, IBLK) + CALL FSET%DATA("PFSQITUR", SELF%PFSQITUR, IBLK) + CALL FSET%DATA("PFPLSL", SELF%PFPLSL, IBLK) + CALL FSET%DATA("PFPLSN", SELF%PFPLSN, IBLK) + CALL FSET%DATA("PFHPSL", SELF%PFHPSL, IBLK) + CALL FSET%DATA("PFHPSN", SELF%PFHPSN, IBLK) + CALL FSET%DATA("PCOVPTOT", SELF%PCOVPTOT, IBLK) + CALL FSET%DATA("PRAINFRAC_TOPRFZ", SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD_OUT, FSPACE, NPROMA, NGPTOTG) ! Load reference input data via serialbox CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET - INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS), INTENT(INOUT) :: FSPACE + TYPE(ATLAS_MULTIFIELD), INTENT(INOUT) :: MULTIFIELD_IN, MULTIFIELD_OUT + INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NGPTOTG TYPE(ATLAS_STRUCTUREDGRID) :: GRID - TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE INTEGER(KIND=JPIM) :: KLON, IVAR, B TYPE(VAR3D_PTR), DIMENSION(24) :: IN_VARS_3D_REAL64 TYPE(VAR3D_PTR), DIMENSION(15) :: OUT_VARS_3D_REAL64 REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) REAL(C_DOUBLE), POINTER :: TMP2D(:,:) TYPE(ATLAS_FIELD) :: FIELD - TYPE(ATLAS_PARTITIONER) :: PARTITIONER - INTEGER(KIND=JPIM) :: NGPTOT + TYPE(ATLAS_CONFIG) :: CONFIG + TYPE(ATLAS_CONFIG), DIMENSION(24) :: IN_MFIELD_CONFIG + TYPE(ATLAS_CONFIG), DIMENSION(14) :: OUT_MFIELD_CONFIG + INTEGER :: INVAR_SIZE, OUTVAR_SIZE + CALL INPUT_INITIALIZE(NAME='input') CALL LOAD_SCALAR('KLON', KLON) CALL LOAD_SCALAR('KLEV', SELF%KLEV) CALL LOAD_SCALAR('KFLDX', SELF%KFLDX) - GRID = ATLAS_REGULARLONLATGRID(NGPTOTG,1) - PARTITIONER = ATLAS_PARTITIONER("bands") - FSPACE = ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS(GRID, PARTITIONER, LEVELS=SELF%KLEV, NPROMA=NPROMA, HALO=0) - NGPTOT = FSPACE%SIZE() + GRID = ATLAS_REGULARLONLATGRID(NGPTOTG, 1) + FSPACE = ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS(GRID, LEVELS=SELF%KLEV, NPROMA=NPROMA, HALO=0) SELF%NBLOCKS = FSPACE%NBLKS() - FSET = ATLAS_FIELDSET() - DO IVAR = 1, SIZE(IN_VAR_NAMES) - 6 ! last six variables are special - CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(IN_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) - ENDDO + ! create multifield + CONFIG = atlas_CONFIG() + CALL CONFIG%SET("type", "MultiFieldCreatorIFS") + CALL CONFIG%SET("nproma", NPROMA) + CALL CONFIG%SET("ngptot", FSPACE%SIZE()) + CALL CONFIG%SET("datatype", "real64") + + ! input multifield on model levels, i.e. LEVELS = FSPACE%LEVELS() = SELF%KLEV + INVAR_SIZE = SIZE(IN_VAR_NAMES) - 6 ! the last six variables are special and added through FieldSet + DO IVAR = 1, INVAR_SIZE + IN_MFIELD_CONFIG(IVAR) = ATLAS_CONFIG() + CALL IN_MFIELD_CONFIG(IVAR)%SET("name", TRIM(IN_VAR_NAMES(IVAR))) + END DO + CALL CONFIG%SET("nlev", FSPACE%LEVELS()) + CALL CONFIG%SET("fields", IN_MFIELD_CONFIG) + MULTIFIELD_IN = atlas_MultiField(CONFIG) + CALL FSET%ADD(MULTIFIELD_IN%FIELDSET()) + + ! special six input variables CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PLSM", KIND=ATLAS_REAL(JPRB), LEVELS=0)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="LDCUM", KIND=ATLAS_LOGICAL(), LEVELS=0)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="KTYPE", KIND=ATLAS_INTEGER(JPIM), LEVELS=0)) @@ -240,8 +261,25 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PEXTRA", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,SELF%KFLDX))) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCLV", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,NCLV))) + ! output multifield on model interfaces, i.e. LEVELS = FSPACE%LEVELS() + 1 = SELF%KLEV + 1 + OUTVAR_SIZE = SIZE(OUT_VAR_NAMES) - 2 ! the last two variables are special and added through FieldSet + DO IVAR = 1, OUTVAR_SIZE + OUT_MFIELD_CONFIG(IVAR) = ATLAS_CONFIG() + CALL OUT_MFIELD_CONFIG(IVAR)%SET("name", TRIM(OUT_VAR_NAMES(IVAR))) + CALL OUT_MFIELD_CONFIG(IVAR)%SET("levels", SELF%KLEV+1) + END DO + CALL CONFIG%SET("nlev", 1 + FSPACE%LEVELS()) + CALL CONFIG%SET("fields", OUT_MFIELD_CONFIG) + MULTIFIELD_OUT = atlas_MultiField(CONFIG) + CALL FSET%ADD(MULTIFIELD_OUT%FIELDSET()) + + !FSET = ATLAS_FIELDSET() + !DO IVAR = 1, SIZE(IN_VAR_NAMES) - 6 ! last six variables are special + ! CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(IN_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) + !ENDDO + DO IVAR = 1, SIZE(IN_VAR_NAMES) - CALL LOADVAR_ATLAS(FSET, TRIM(IN_VAR_NAMES(IVAR)), KLON, NGPTOTG) + CALL LOADVAR_ATLAS(FSET, FSPACE, TRIM(IN_VAR_NAMES(IVAR)), KLON, NGPTOTG) ENDDO FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_CML', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) @@ -255,10 +293,12 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) ! unrolled at every step, and we rely on dirty hackery to do this. CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_CML', KLON, NGPTOTG) CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_TMP', KLON, NGPTOTG) + ! Output fields are simply allocated and zero'd - DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 2 - CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) - ENDDO + ! + !DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 2 + ! CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) + !ENDDO CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCOVPTOT", KIND=ATLAS_REAL(JPRB))) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PRAINFRAC_TOPRFZ", KIND=ATLAS_REAL(JPRB), LEVELS=0)) @@ -271,10 +311,6 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) END DO !$omp end parallel do ENDDO - ! DEBUG - !FIELD = FSET%FIELD("PAP") - !call field%data(tmp3d) - !print *, MINVAL(tmp3d), MAXVAL(tmp3d) FIELD = FSET%FIELD("PRAINFRAC_TOPRFZ") CALL FIELD%DATA(TMP2D) @@ -296,11 +332,12 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) CALL INPUT_FINALIZE() END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, NGPTOTG) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, FSPACE, NGPTOTG) ! Validate the correctness of output against reference data CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET - INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS), INTENT(IN) :: FSPACE + INTEGER(KIND=JPIM), INTENT(IN) :: NGPTOTG INTEGER(KIND=JPIM) :: KLON, IVAR @@ -317,11 +354,11 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, NGPTOTG) ! Actual variable validation - CALL VALIDATEVAR_ATLAS(FSET, 'PLUDE', KLON, NGPTOTG) + CALL VALIDATEVAR_ATLAS(FSET, FSPACE, 'PLUDE', KLON, NGPTOTG) DO IVAR = 1, SIZE(OUT_VAR_NAMES) - CALL VALIDATEVAR_ATLAS(FSET, OUT_VAR_NAMES(IVAR), KLON, NGPTOTG) + CALL VALIDATEVAR_ATLAS(FSET, FSPACE, TRIM(OUT_VAR_NAMES(IVAR)), KLON, NGPTOTG) ENDDO - CALL VALIDATESTATE_ATLAS(FSET, 'TENDENCY_LOC', KLON, NGPTOTG) + CALL VALIDATESTATE_ATLAS(FSET, FSPACE, 'TENDENCY_LOC', KLON, NGPTOTG) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index dd8470cb..c8049676 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -17,6 +17,8 @@ PROGRAM DWARF_CLOUDSC USE ATLAS_MODULE USE, INTRINSIC :: ISO_C_BINDING +USE ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS_MODULE +USE ATLAS_MULTIFIELD_MODULE IMPLICIT NONE @@ -27,10 +29,9 @@ PROGRAM DWARF_CLOUDSC INTEGER(KIND=JPIM) :: NGPTOTG = 16384 ! Number of grid points (as read from command line) INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active) -REAL(c_double), pointer :: tmp3d(:,:,:) -type(atlas_fieldset) :: fset -type(atlas_field) :: field - +TYPE(ATLAS_FIELDSET) :: FSET +TYPE(ATLAS_MULTIFIELD) :: MULTIFIELD_IN, MULTIFIELD_OUT +TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(CLOUDSC_GLOBAL_ATLAS_STATE) :: GLOBAL_ATLAS_STATE INTEGER(KIND=JPIB) :: ENERGY, POWER @@ -69,13 +70,13 @@ PROGRAM DWARF_CLOUDSC FSET = ATLAS_FIELDSET() ! TODO: Create a global memory state from serialized input data -CALL GLOBAL_ATLAS_STATE%LOAD(FSET, NPROMA, NGPTOTG) +CALL GLOBAL_ATLAS_STATE%LOAD(FSET, MULTIFIELD_IN, MULTIFIELD_OUT, FSPACE, NPROMA, NGPTOTG) ! Call the driver to perform the parallel loop over our kernel CALL CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY) ! Validate the output against serialized reference data -CALL GLOBAL_ATLAS_STATE%VALIDATE(FSET, NGPTOTG) +CALL GLOBAL_ATLAS_STATE%VALIDATE(FSET, FSPACE, NGPTOTG) ! Tear down MPI environment CALL ATLAS_LIBRARY%FINALISE() diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index 16cf8f38..9d191988 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -25,9 +25,10 @@ module expand_atlas_mod contains - subroutine loadvar_atlas(fset, name, nlon, ngptotg) + subroutine loadvar_atlas(fset, fspace, name, nlon, ngptotg) ! Load into the local memory buffer and expand to global field type(atlas_fieldset), intent(inout) :: fset + type(atlas_functionspace_blockstructuredcolumns), intent(in) :: fspace character(len=*), intent(in) :: name integer(kind=jpim), intent(in) :: nlon integer(kind=jpim), intent(in), optional :: ngptotg @@ -40,7 +41,7 @@ subroutine loadvar_atlas(fset, name, nlon, ngptotg) real(c_double), pointer :: field_r1(:,:), field_r2(:,:,:), field_r3(:,:,:,:) integer(c_int), pointer :: field_i1(:,:) logical, pointer :: field_l1(:,:) - type(atlas_functionspace_blockstructuredcolumns) :: fspace + !type(atlas_functionspace_blockstructuredcolumns) :: fspace logical :: lfield, rfield, ifield field = fset%field(name) @@ -49,7 +50,7 @@ subroutine loadvar_atlas(fset, name, nlon, ngptotg) ifield = (name == "KTYPE") rfield = ((.not. lfield) .and. (.not. ifield)) - fspace = field%functionspace() + !fspace = field%functionspace() nlev = field%levels() nproma = fspace%nproma() ngptot = fspace%size() diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 3ebcebc0..b9fb3c5e 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -23,21 +23,23 @@ MODULE VALIDATE_ATLAS_MOD CONTAINS - SUBROUTINE VALIDATESTATE_ATLAS(FSET, NAME, NLON, NGPTOTG) + SUBROUTINE VALIDATESTATE_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG) TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS), INTENT(IN) :: FSPACE CHARACTER(*), INTENT(IN) :: NAME INTEGER(KIND=JPIM), INTENT(IN) :: NLON INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG - CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "A") - CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "Q") - CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "T") - CALL VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, "CLD") + CALL VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, "A") + CALL VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, "Q") + CALL VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, "T") + CALL VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, "CLD") END SUBROUTINE VALIDATESTATE_ATLAS - SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) + SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) ! Computes and prints errors "in the L1 norm sense" TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS), INTENT(IN) :: FSPACE CHARACTER(*), INTENT(IN) :: NAME INTEGER(KIND=JPIM), INTENT(IN) :: NLON INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG @@ -45,11 +47,10 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) REAL(KIND=JPRB), ALLOCATABLE :: REF_R2(:,:), REF_R3(:,:,:), REF_R4(:,:,:,:) REAL(C_DOUBLE), POINTER :: FIELD_R1(:,:), FIELD_R2(:,:,:), FIELD_R3(:,:,:,:) - TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(ATLAS_FIELD) :: FIELD INTEGER :: B, BSIZE, JL, JK, JM REAL(KIND=JPRB) :: ZMINVAL(1), ZMAX_VAL_ERR(2), ZDIFF, ZSUM_ERR_ABS(2), ZRELERR, ZAVGPGP - INTEGER :: FRANK, NBLOCKS, NLEV, NGPTOT, NPROMA, VAR_ID, NDIM + INTEGER :: FRANK, NBLOCKS, NLEV, NGPTOT, VAR_ID, NDIM, NPROMA CHARACTER(LEN=256) :: FULLNAME IF (PRESENT(STATE_VAR)) THEN @@ -60,7 +61,6 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) FIELD = FSET%FIELD(NAME) FRANK = FIELD%RANK() - FSPACE = FIELD%FUNCTIONSPACE() NLEV = FIELD%LEVELS() NGPTOT = FSPACE%SIZE() NBLOCKS = FSPACE%NBLKS() @@ -73,7 +73,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) CALL INPUT_INITIALIZE(NAME='reference') IF (FRANK == 2) THEN - CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) CALL FIELD%DATA(FIELD_R1) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) @@ -90,7 +90,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) ENDDO END DO ELSE IF (FRANK == 3) THEN - CALL LOAD_AND_EXPAND(NAME, REF_R3, NLON, FIELD%LEVELS(), NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME, REF_R3, NLON, FIELD%LEVELS(), FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) CALL FIELD%DATA(FIELD_R2) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) @@ -119,7 +119,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) IF (STATE_VAR == 'Q') THEN VAR_ID = 3 ENDIF - CALL LOAD_AND_EXPAND(NAME//'_'//STATE_VAR, REF_R3, NLON, NLEV, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME//'_'//STATE_VAR, REF_R3, NLON, NLEV, FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) DO B=1, NBLOCKS @@ -137,7 +137,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) END DO END DO ELSE IF (STATE_VAR == 'CLD') THEN - CALL LOAD_AND_EXPAND(NAME//'_CLD', REF_R4, NLON, NLEV, NDIM, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME//'_CLD', REF_R4, NLON, NLEV, NDIM, FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) DO B=1, NBLOCKS From 31cbe4d422aef432cfe088cdaeb5e20911cdcdfa Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 12 Sep 2023 16:49:10 +0200 Subject: [PATCH 075/174] working out numbering for input variables in multifield --- .../cloudsc_global_atlas_state_mod.F90 | 97 ++++++++----------- 1 file changed, 41 insertions(+), 56 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index c4e21af2..1e826398 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -140,48 +140,48 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) ! field = fset%field(1) ! call field%update_device() - CALL FSET%DATA("PLCRIT_AER", SELF%PLCRIT_AER, IBLK) - CALL FSET%DATA("PICRIT_AER", SELF%PICRIT_AER, IBLK) - CALL FSET%DATA("PRE_ICE", SELF%PRE_ICE, IBLK) - CALL FSET%DATA("PCCN", SELF%PCCN, IBLK) - CALL FSET%DATA("PNICE", SELF%PNICE, IBLK) - CALL FSET%DATA("PT", SELF%PT, IBLK) - CALL FSET%DATA("PQ", SELF%PQ, IBLK) - CALL FSET%DATA("PVFA", SELF%PVFA, IBLK) - CALL FSET%DATA("PVFL", SELF%PVFL, IBLK) - CALL FSET%DATA("PVFI", SELF%PVFI, IBLK) - CALL FSET%DATA("PDYNA", SELF%PDYNA, IBLK) - CALL FSET%DATA("PDYNL", SELF%PDYNL, IBLK) - CALL FSET%DATA("PDYNI", SELF%PDYNI, IBLK) - CALL FSET%DATA("PHRSW", SELF%PHRSW, IBLK) - CALL FSET%DATA("PHRLW", SELF%PHRLW, IBLK) - CALL FSET%DATA("PVERVEL", SELF%PVERVEL, IBLK) - CALL FSET%DATA("PAP", SELF%PAP, IBLK) - CALL FSET%DATA("PLU", SELF%PLU, IBLK) - CALL FSET%DATA("PLUDE", SELF%PLUDE, IBLK) - CALL FSET%DATA("PSNDE", SELF%PSNDE, IBLK) - CALL FSET%DATA("PMFU", SELF%PMFU, IBLK) - CALL FSET%DATA("PMFD", SELF%PMFD, IBLK) - CALL FSET%DATA("PA", SELF%PA, IBLK) - CALL FSET%DATA("PSUPSAT", SELF%PSUPSAT, IBLK) - CALL FSET%DATA("PLSM", SELF%PLSM, IBLK) - CALL FSET%DATA("LDCUM", SELF%LDCUM, IBLK) - CALL FSET%DATA("KTYPE", SELF%KTYPE, IBLK) - CALL FSET%DATA("PAPH", SELF%PAPH, IBLK) - CALL FSET%DATA("PEXTRA", SELF%PEXTRA, IBLK) - CALL FSET%DATA("PCLV", SELF%PCLV, IBLK) - - CALL FSET%DATA("TENDENCY_CML", TMP3D, IBLK) + CALL FSET%DATA(1, SELF%PLCRIT_AER, IBLK) + CALL FSET%DATA(2, SELF%PICRIT_AER, IBLK) + CALL FSET%DATA(3, SELF%PRE_ICE, IBLK) + CALL FSET%DATA(4, SELF%PCCN, IBLK) + CALL FSET%DATA(5, SELF%PNICE, IBLK) + CALL FSET%DATA(6, SELF%PT, IBLK) + CALL FSET%DATA(7, SELF%PQ, IBLK) + CALL FSET%DATA(8, SELF%PVFA, IBLK) + CALL FSET%DATA(9, SELF%PVFL, IBLK) + CALL FSET%DATA(10, SELF%PVFI, IBLK) + CALL FSET%DATA(11, SELF%PDYNA, IBLK) + CALL FSET%DATA(12, SELF%PDYNL, IBLK) + CALL FSET%DATA(13, SELF%PDYNI, IBLK) + CALL FSET%DATA(14, SELF%PHRSW, IBLK) + CALL FSET%DATA(15, SELF%PHRLW, IBLK) + CALL FSET%DATA(16, SELF%PVERVEL, IBLK) + CALL FSET%DATA(17, SELF%PAP, IBLK) + CALL FSET%DATA(18, SELF%PLU, IBLK) + CALL FSET%DATA(19, SELF%PLUDE, IBLK) + CALL FSET%DATA(20, SELF%PSNDE, IBLK) + CALL FSET%DATA(21, SELF%PMFU, IBLK) + CALL FSET%DATA(22, SELF%PMFD, IBLK) + CALL FSET%DATA(23, SELF%PA, IBLK) + CALL FSET%DATA(24, SELF%PSUPSAT, IBLK) + CALL FSET%DATA(25, SELF%PLSM, IBLK) + CALL FSET%DATA(26, SELF%LDCUM, IBLK) + CALL FSET%DATA(27, SELF%KTYPE, IBLK) + CALL FSET%DATA(28, SELF%PAPH, IBLK) + CALL FSET%DATA(29, SELF%PEXTRA, IBLK) + CALL FSET%DATA(30, SELF%PCLV, IBLK) + + CALL FSET%DATA(31, TMP3D, IBLK) SELF%TENDENCY_CML%T => TMP3D(:,:,1) SELF%TENDENCY_CML%A => TMP3D(:,:,2) SELF%TENDENCY_CML%Q => TMP3D(:,:,3) SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) - CALL FSET%DATA("TENDENCY_TMP", TMP3D, IBLK) + CALL FSET%DATA(32, TMP3D, IBLK) SELF%TENDENCY_TMP%T => TMP3D(:,:,1) SELF%TENDENCY_TMP%A => TMP3D(:,:,2) SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) - CALL FSET%DATA("TENDENCY_LOC", TMP3D, IBLK) + CALL FSET%DATA(33, TMP3D, IBLK) SELF%TENDENCY_LOC%T => TMP3D(:,:,1) SELF%TENDENCY_LOC%A => TMP3D(:,:,2) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) @@ -260,6 +260,9 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PAPH", KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PEXTRA", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,SELF%KFLDX))) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCLV", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,NCLV))) + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_CML', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_TMP', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_LOC', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) ! output multifield on model interfaces, i.e. LEVELS = FSPACE%LEVELS() + 1 = SELF%KLEV + 1 OUTVAR_SIZE = SIZE(OUT_VAR_NAMES) - 2 ! the last two variables are special and added through FieldSet @@ -273,35 +276,17 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD MULTIFIELD_OUT = atlas_MultiField(CONFIG) CALL FSET%ADD(MULTIFIELD_OUT%FIELDSET()) - !FSET = ATLAS_FIELDSET() - !DO IVAR = 1, SIZE(IN_VAR_NAMES) - 6 ! last six variables are special - ! CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(IN_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) - !ENDDO + ! special two output variables + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCOVPTOT", KIND=ATLAS_REAL(JPRB))) + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PRAINFRAC_TOPRFZ", KIND=ATLAS_REAL(JPRB), LEVELS=0)) DO IVAR = 1, SIZE(IN_VAR_NAMES) CALL LOADVAR_ATLAS(FSET, FSPACE, TRIM(IN_VAR_NAMES(IVAR)), KLON, NGPTOTG) ENDDO - - FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_CML', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) - CALL FSET%ADD(FIELD) - FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_TMP', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) - CALL FSET%ADD(FIELD) - FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_LOC', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) - CALL FSET%ADD(FIELD) - - ! The STATE_TYPE arrays are tricky, as the AOSOA layout needs to be expictly - ! unrolled at every step, and we rely on dirty hackery to do this. CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_CML', KLON, NGPTOTG) CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_TMP', KLON, NGPTOTG) - ! Output fields are simply allocated and zero'd - ! - !DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 2 - ! CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) - !ENDDO - CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCOVPTOT", KIND=ATLAS_REAL(JPRB))) - CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PRAINFRAC_TOPRFZ", KIND=ATLAS_REAL(JPRB), LEVELS=0)) - + ! Output fields are simply zero'd DO IVAR = 1, SIZE(OUT_VAR_NAMES) - 1 FIELD = FSET%FIELD(TRIM(OUT_VAR_NAMES(IVAR))) CALL FIELD%DATA(TMP3D) From ee340beb5f84de7bf6600a33598bb472757fb0fc Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 12 Sep 2023 16:51:48 +0200 Subject: [PATCH 076/174] using Field lookup be idx (more performance) whilst using MultiField --- .../cloudsc_global_atlas_state_mod.F90 | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 1e826398..58b9e62c 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -187,22 +187,22 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) - CALL FSET%DATA("PFSQLF", SELF%PFSQLF, IBLK) - CALL FSET%DATA("PFSQIF", SELF%PFSQIF, IBLK) - CALL FSET%DATA("PFCQLNG", SELF%PFCQLNG, IBLK) - CALL FSET%DATA("PFCQNNG", SELF%PFCQNNG, IBLK) - CALL FSET%DATA("PFSQRF", SELF%PFSQRF, IBLK) - CALL FSET%DATA("PFSQSF", SELF%PFSQSF, IBLK) - CALL FSET%DATA("PFCQRNG", SELF%PFCQRNG, IBLK) - CALL FSET%DATA("PFCQSNG", SELF%PFCQSNG, IBLK) - CALL FSET%DATA("PFSQLTUR", SELF%PFSQLTUR, IBLK) - CALL FSET%DATA("PFSQITUR", SELF%PFSQITUR, IBLK) - CALL FSET%DATA("PFPLSL", SELF%PFPLSL, IBLK) - CALL FSET%DATA("PFPLSN", SELF%PFPLSN, IBLK) - CALL FSET%DATA("PFHPSL", SELF%PFHPSL, IBLK) - CALL FSET%DATA("PFHPSN", SELF%PFHPSN, IBLK) - CALL FSET%DATA("PCOVPTOT", SELF%PCOVPTOT, IBLK) - CALL FSET%DATA("PRAINFRAC_TOPRFZ", SELF%PRAINFRAC_TOPRFZ, IBLK) + CALL FSET%DATA(34, SELF%PFSQLF, IBLK) + CALL FSET%DATA(35, SELF%PFSQIF, IBLK) + CALL FSET%DATA(36, SELF%PFCQLNG, IBLK) + CALL FSET%DATA(37, SELF%PFCQNNG, IBLK) + CALL FSET%DATA(38, SELF%PFSQRF, IBLK) + CALL FSET%DATA(39, SELF%PFSQSF, IBLK) + CALL FSET%DATA(40, SELF%PFCQRNG, IBLK) + CALL FSET%DATA(41, SELF%PFCQSNG, IBLK) + CALL FSET%DATA(42, SELF%PFSQLTUR, IBLK) + CALL FSET%DATA(43, SELF%PFSQITUR, IBLK) + CALL FSET%DATA(44, SELF%PFPLSL, IBLK) + CALL FSET%DATA(45, SELF%PFPLSN, IBLK) + CALL FSET%DATA(46, SELF%PFHPSL, IBLK) + CALL FSET%DATA(47, SELF%PFHPSN, IBLK) + CALL FSET%DATA(48, SELF%PCOVPTOT, IBLK) + CALL FSET%DATA(49, SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD_OUT, FSPACE, NPROMA, NGPTOTG) From 961bef8099b7a4f560e0dd738b8c86ff312a7f94 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Thu, 14 Sep 2023 12:56:25 +0200 Subject: [PATCH 077/174] clean up memory leaks of Atlas (with Willem) --- .../cloudsc_driver_mod.F90 | 3 + .../cloudsc_global_atlas_state_mod.F90 | 85 ++++++++++++++----- .../dwarf_cloudsc_atlas.F90 | 6 +- .../expand_atlas_mod.F90 | 3 + .../validate_atlas_mod.F90 | 2 + 5 files changed, 75 insertions(+), 24 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index ba9937f4..6b2b8f4f 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -145,6 +145,9 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, KFLDX, PTSPHY) & (REAL(POWER_TOTAL, KIND=JPRD) / REAL(POWER_COUNT, KIND=JPRD)), & & "count:", POWER_COUNT END IF + + CALL FIELD%FINAL() + CALL FSPACE%FINAL() END SUBROUTINE CLOUDSC_DRIVER diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 58b9e62c..88ad0173 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -131,7 +131,7 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) CLASS(CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW), INTENT(INOUT) :: SELF - CLASS(ATLAS_FIELDSET), INTENT(INOUT) :: FSET + TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET INTEGER, INTENT(IN) :: IBLK REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) @@ -205,15 +205,15 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) CALL FSET%DATA(49, SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK - SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD_OUT, FSPACE, NPROMA, NGPTOTG) + SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) ! Load reference input data via serialbox CLASS(CLOUDSC_GLOBAL_ATLAS_STATE) :: SELF TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS), INTENT(INOUT) :: FSPACE - TYPE(ATLAS_MULTIFIELD), INTENT(INOUT) :: MULTIFIELD_IN, MULTIFIELD_OUT INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NGPTOTG TYPE(ATLAS_STRUCTUREDGRID) :: GRID + TYPE(ATLAS_MULTIFIELD) :: MULTIFIELD_IN, MULTIFIELD_OUT INTEGER(KIND=JPIM) :: KLON, IVAR, B TYPE(VAR3D_PTR), DIMENSION(24) :: IN_VARS_3D_REAL64 TYPE(VAR3D_PTR), DIMENSION(15) :: OUT_VARS_3D_REAL64 @@ -224,6 +224,21 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD TYPE(ATLAS_CONFIG), DIMENSION(24) :: IN_MFIELD_CONFIG TYPE(ATLAS_CONFIG), DIMENSION(14) :: OUT_MFIELD_CONFIG INTEGER :: INVAR_SIZE, OUTVAR_SIZE + LOGICAL :: LMULTIFIELD + CHARACTER(len=8) :: CENV + INTEGER :: CENV_LEN + + LMULTIFIELD = .TRUE. + CALL GET_ENVIRONMENT_VARIABLE("CLOUDSC_ATLAS_MULTIFIELD",CENV,CENV_LEN) + IF (CENV_LEN > 0 ) THEN + IF( TRIM(CENV) == "0" .OR. TRIM(CENV) == "OFF" .OR. TRIM(CENV) == "FALSE" ) THEN + LMULTIFIELD = .FALSE. + ENDIF + ENDIF + + IF (IRANK == 0) THEN + PRINT *, " LMULTIFIELD: ", LMULTIFIELD + ENDIF CALL INPUT_INITIALIZE(NAME='input') @@ -242,16 +257,26 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD CALL CONFIG%SET("ngptot", FSPACE%SIZE()) CALL CONFIG%SET("datatype", "real64") - ! input multifield on model levels, i.e. LEVELS = FSPACE%LEVELS() = SELF%KLEV INVAR_SIZE = SIZE(IN_VAR_NAMES) - 6 ! the last six variables are special and added through FieldSet - DO IVAR = 1, INVAR_SIZE - IN_MFIELD_CONFIG(IVAR) = ATLAS_CONFIG() - CALL IN_MFIELD_CONFIG(IVAR)%SET("name", TRIM(IN_VAR_NAMES(IVAR))) - END DO - CALL CONFIG%SET("nlev", FSPACE%LEVELS()) - CALL CONFIG%SET("fields", IN_MFIELD_CONFIG) - MULTIFIELD_IN = atlas_MultiField(CONFIG) - CALL FSET%ADD(MULTIFIELD_IN%FIELDSET()) + IF (LMULTIFIELD) THEN + ! input multifield on model levels, i.e. LEVELS = FSPACE%LEVELS() = SELF%KLEV + DO IVAR = 1, INVAR_SIZE + IN_MFIELD_CONFIG(IVAR) = ATLAS_CONFIG() + CALL IN_MFIELD_CONFIG(IVAR)%SET("name", TRIM(IN_VAR_NAMES(IVAR))) + END DO + CALL CONFIG%SET("nlev", FSPACE%LEVELS()) + CALL CONFIG%SET("fields", IN_MFIELD_CONFIG) + DO IVAR = 1, INVAR_SIZE + CALL IN_MFIELD_CONFIG(IVAR)%FINAL() + END DO + MULTIFIELD_IN = atlas_MultiField(CONFIG) + CALL FSET%ADD(MULTIFIELD_IN%FIELDSET()) + CALL MULTIFIELD_IN%FINAL() + ELSE + DO IVAR = 1, INVAR_SIZE + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(IN_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) + END DO + ENDIF ! special six input variables CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PLSM", KIND=ATLAS_REAL(JPRB), LEVELS=0)) @@ -264,17 +289,29 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_TMP', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_LOC', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) - ! output multifield on model interfaces, i.e. LEVELS = FSPACE%LEVELS() + 1 = SELF%KLEV + 1 + OUTVAR_SIZE = SIZE(OUT_VAR_NAMES) - 2 ! the last two variables are special and added through FieldSet - DO IVAR = 1, OUTVAR_SIZE - OUT_MFIELD_CONFIG(IVAR) = ATLAS_CONFIG() - CALL OUT_MFIELD_CONFIG(IVAR)%SET("name", TRIM(OUT_VAR_NAMES(IVAR))) - CALL OUT_MFIELD_CONFIG(IVAR)%SET("levels", SELF%KLEV+1) - END DO - CALL CONFIG%SET("nlev", 1 + FSPACE%LEVELS()) - CALL CONFIG%SET("fields", OUT_MFIELD_CONFIG) - MULTIFIELD_OUT = atlas_MultiField(CONFIG) - CALL FSET%ADD(MULTIFIELD_OUT%FIELDSET()) + IF (LMULTIFIELD) THEN + ! output multifield on model interfaces, i.e. LEVELS = FSPACE%LEVELS() + 1 = SELF%KLEV + 1 + DO IVAR = 1, OUTVAR_SIZE + OUT_MFIELD_CONFIG(IVAR) = ATLAS_CONFIG() + CALL OUT_MFIELD_CONFIG(IVAR)%SET("name", TRIM(OUT_VAR_NAMES(IVAR))) + CALL OUT_MFIELD_CONFIG(IVAR)%SET("levels", SELF%KLEV+1) + ! CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) + END DO + CALL CONFIG%SET("nlev", 1 + FSPACE%LEVELS()) + CALL CONFIG%SET("fields", OUT_MFIELD_CONFIG) + DO IVAR = 1, OUTVAR_SIZE + CALL OUT_MFIELD_CONFIG(IVAR)%FINAL() + END DO + MULTIFIELD_OUT = atlas_MultiField(CONFIG) + CALL FSET%ADD(MULTIFIELD_OUT%FIELDSET()) + CALL MULTIFIELD_OUT%FINAL() + ELSE + DO IVAR = 1, OUTVAR_SIZE + CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) + END DO + ENDIF ! special two output variables CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCOVPTOT", KIND=ATLAS_REAL(JPRB))) @@ -315,6 +352,10 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, MULTIFIELD_IN, MULTIFIELD CALL YREPHLI_LOAD_PARAMETERS() CALL INPUT_FINALIZE() + + CALL FIELD%FINAL() + CALL CONFIG%FINAL() + CALL GRID%FINAL() END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, FSPACE, NGPTOTG) diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index c8049676..188265f4 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -30,7 +30,6 @@ PROGRAM DWARF_CLOUDSC INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active) TYPE(ATLAS_FIELDSET) :: FSET -TYPE(ATLAS_MULTIFIELD) :: MULTIFIELD_IN, MULTIFIELD_OUT TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(CLOUDSC_GLOBAL_ATLAS_STATE) :: GLOBAL_ATLAS_STATE @@ -70,7 +69,7 @@ PROGRAM DWARF_CLOUDSC FSET = ATLAS_FIELDSET() ! TODO: Create a global memory state from serialized input data -CALL GLOBAL_ATLAS_STATE%LOAD(FSET, MULTIFIELD_IN, MULTIFIELD_OUT, FSPACE, NPROMA, NGPTOTG) +CALL GLOBAL_ATLAS_STATE%LOAD(FSET, FSPACE, NPROMA, NGPTOTG) ! Call the driver to perform the parallel loop over our kernel CALL CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, GLOBAL_ATLAS_STATE%KFLDX, GLOBAL_ATLAS_STATE%PTSPHY) @@ -78,6 +77,9 @@ PROGRAM DWARF_CLOUDSC ! Validate the output against serialized reference data CALL GLOBAL_ATLAS_STATE%VALIDATE(FSET, FSPACE, NGPTOTG) +CALL FSET%FINAL() +CALL FSPACE%FINAL() + ! Tear down MPI environment CALL ATLAS_LIBRARY%FINALISE() CALL CLOUDSC_MPI_END() diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index 9d191988..deb5facf 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -97,6 +97,7 @@ subroutine loadvar_atlas(fset, fspace, name, nlon, ngptotg) deallocate(buffer_r3) endif endif + call field%final() end subroutine loadvar_atlas subroutine loadstate_atlas(fset, name, nlon, ngptotg) @@ -135,6 +136,8 @@ subroutine loadstate_atlas(fset, name, nlon, ngptotg) call expand(buffer(:,:,3), field_r3(:,:,3,:), size, nproma, nlev, ngptot, nblocks) call expand(buffer(:,:,4:), field_r3(:,:,4:,:), size, nproma, nlev, ndim, ngptot, nblocks) deallocate(buffer) + call field%final() + call fspace%final() end subroutine loadstate_atlas end module expand_atlas_mod diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index b9fb3c5e..2308f582 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -177,6 +177,8 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) CALL ERROR_PRINT(FULLNAME, ZMINVAL(1), ZMAX_VAL_ERR(1), ZMAX_VAL_ERR(2), & & ZSUM_ERR_ABS(1), ZSUM_ERR_ABS(2), ZAVGPGP, NDIM=FRANK-1) END IF + + CALL FIELD%FINAL() END SUBROUTINE VALIDATEVAR_ATLAS END MODULE VALIDATE_ATLAS_MOD From dc9b9191077172567bc5e00e8a0cde9fb4ac474c Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Thu, 14 Sep 2023 13:29:32 +0200 Subject: [PATCH 078/174] add Atlas tracing (with Willem) --- src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 | 6 +++++- .../cloudsc_global_atlas_state_mod.F90 | 11 +++++++++-- src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 | 6 +++++- src/cloudsc_fortran_atlas/expand_atlas_mod.F90 | 8 ++++++++ 4 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index 6b2b8f4f..bea46abd 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -45,6 +45,9 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, KFLDX, PTSPHY) INTEGER(KIND=JPIB) :: ENERGY, POWER, POWER_TOTAL, POWER_MAX, POWER_COUNT LOGICAL :: LEC_PMON = .FALSE. CHARACTER(LEN=1) :: CLEC_PMON + TYPE(ATLAS_TRACE) :: TRACE + + TRACE = ATLAS_TRACE("cloudsc_driver_mod.F90", __LINE__, "CLOUDSC_DRIVER","COMPUTE") CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON) IF (CLEC_PMON == '1') LEC_PMON = .TRUE. @@ -148,7 +151,8 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, KFLDX, PTSPHY) CALL FIELD%FINAL() CALL FSPACE%FINAL() - + CALL TRACE%FINAL() + END SUBROUTINE CLOUDSC_DRIVER END MODULE CLOUDSC_DRIVER_MOD diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 88ad0173..ff81743c 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -227,6 +227,10 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) LOGICAL :: LMULTIFIELD CHARACTER(len=8) :: CENV INTEGER :: CENV_LEN + TYPE(ATLAS_TRACE) :: TRACE, TRACE_IO + + TRACE = ATLAS_TRACE("cloudsc_global_atlas_state_mod.F90", __LINE__, & + & "CLOUDSC_GLOBAL_ATLAS_STATE_LOAD") LMULTIFIELD = .TRUE. CALL GET_ENVIRONMENT_VARIABLE("CLOUDSC_ATLAS_MULTIFIELD",CENV,CENV_LEN) @@ -251,7 +255,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) SELF%NBLOCKS = FSPACE%NBLKS() ! create multifield - CONFIG = atlas_CONFIG() + CONFIG = ATLAS_CONFIG() CALL CONFIG%SET("type", "MultiFieldCreatorIFS") CALL CONFIG%SET("nproma", NPROMA) CALL CONFIG%SET("ngptot", FSPACE%SIZE()) @@ -356,6 +360,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) CALL FIELD%FINAL() CALL CONFIG%FINAL() CALL GRID%FINAL() + CALL TRACE%FINAL() END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, FSPACE, NGPTOTG) @@ -365,8 +370,10 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, FSPACE, NGPTOTG) TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS), INTENT(IN) :: FSPACE INTEGER(KIND=JPIM), INTENT(IN) :: NGPTOTG + TYPE(ATLAS_TRACE) :: TRACE INTEGER(KIND=JPIM) :: KLON, IVAR + TRACE = ATLAS_TRACE("cloudsc_global_atlas_state_mod.F90", __LINE__, "VALIDATE", "VALIDATE") CALL INPUT_INITIALIZE(NAME='reference') CALL LOAD_SCALAR('KLON', KLON) print *, "KLON = ", KLON @@ -385,7 +392,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, FSPACE, NGPTOTG) CALL VALIDATEVAR_ATLAS(FSET, FSPACE, TRIM(OUT_VAR_NAMES(IVAR)), KLON, NGPTOTG) ENDDO CALL VALIDATESTATE_ATLAS(FSET, FSPACE, 'TENDENCY_LOC', KLON, NGPTOTG) - + CALL TRACE%FINAL() END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE END MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index 188265f4..ab3574a2 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -32,10 +32,11 @@ PROGRAM DWARF_CLOUDSC TYPE(ATLAS_FIELDSET) :: FSET TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(CLOUDSC_GLOBAL_ATLAS_STATE) :: GLOBAL_ATLAS_STATE - +TYPE(ATLAS_TRACE) :: TRACE INTEGER(KIND=JPIB) :: ENERGY, POWER CHARACTER(LEN=1) :: CLEC_PMON + CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON) IF (CLEC_PMON == '1') THEN CALL EC_PMON(ENERGY, POWER) @@ -53,6 +54,7 @@ PROGRAM DWARF_CLOUDSC ! Initialize MPI environment CALL CLOUDSC_MPI_INIT(NUMOMP) CALL ATLAS_LIBRARY%INITIALISE() +TRACE = ATLAS_TRACE("dwarf_cloudsc_atlas.F90",__LINE__,"program") ! Get total number of grid points (NGPTOTG) with which to run the benchmark IF (IARGS >= 2) THEN @@ -80,6 +82,8 @@ PROGRAM DWARF_CLOUDSC CALL FSET%FINAL() CALL FSPACE%FINAL() +CALL TRACE%FINAL() + ! Tear down MPI environment CALL ATLAS_LIBRARY%FINALISE() CALL CLOUDSC_MPI_END() diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index deb5facf..cc43e1a1 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -43,6 +43,8 @@ subroutine loadvar_atlas(fset, fspace, name, nlon, ngptotg) logical, pointer :: field_l1(:,:) !type(atlas_functionspace_blockstructuredcolumns) :: fspace logical :: lfield, rfield, ifield + type(atlas_trace) :: trace + trace = atlas_trace("expand_atlas_mod.F90", __LINE__, "loadvar_atlas", "IO") field = fset%field(name) frank = field%rank() @@ -98,6 +100,7 @@ subroutine loadvar_atlas(fset, fspace, name, nlon, ngptotg) endif endif call field%final() + call trace%final() end subroutine loadvar_atlas subroutine loadstate_atlas(fset, name, nlon, ngptotg) @@ -110,10 +113,13 @@ subroutine loadstate_atlas(fset, name, nlon, ngptotg) integer(kind=jpim) :: start, end, size, nlev, nproma, ngptot, nblocks, ndim type(atlas_field) :: field type(atlas_functionspace_blockstructuredcolumns) :: fspace + type(atlas_trace) :: trace real(kind=jprb), allocatable :: buffer(:,:,:) real(c_double), pointer :: field_r3(:,:,:,:) + trace = atlas_trace("expand_atlas_mod.F90", __LINE__, "loadstate_atlas", "IO") + field = fset%field(name) fspace = field%functionspace() nlev = field%levels() @@ -135,9 +141,11 @@ subroutine loadstate_atlas(fset, name, nlon, ngptotg) call expand(buffer(:,:,2), field_r3(:,:,2,:), size, nproma, nlev, ngptot, nblocks) call expand(buffer(:,:,3), field_r3(:,:,3,:), size, nproma, nlev, ngptot, nblocks) call expand(buffer(:,:,4:), field_r3(:,:,4:,:), size, nproma, nlev, ndim, ngptot, nblocks) + deallocate(buffer) call field%final() call fspace%final() + call trace%final() end subroutine loadstate_atlas end module expand_atlas_mod From 43492357c9c3c2ff14241ed1a39725e6b030e3a9 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Thu, 14 Sep 2023 14:55:32 +0000 Subject: [PATCH 079/174] ensure OMP stays activates after Atlas and its dependencies --- bundle.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/bundle.yml b/bundle.yml index b4b2204d..f4a60dc7 100644 --- a/bundle.yml +++ b/bundle.yml @@ -10,6 +10,7 @@ cmake : > BUILD_eckit=OFF BUILD_fckit=OFF BUILD_atlas=OFF + ENABLE_OMP=ON projects : From acc7299bf0d25b44c9f9a2bb7211961769ee7d60 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Thu, 14 Sep 2023 14:56:16 +0000 Subject: [PATCH 080/174] cleanup --- src/cloudsc_fortran_atlas/expand_atlas_mod.F90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index cc43e1a1..3253de1e 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -41,7 +41,6 @@ subroutine loadvar_atlas(fset, fspace, name, nlon, ngptotg) real(c_double), pointer :: field_r1(:,:), field_r2(:,:,:), field_r3(:,:,:,:) integer(c_int), pointer :: field_i1(:,:) logical, pointer :: field_l1(:,:) - !type(atlas_functionspace_blockstructuredcolumns) :: fspace logical :: lfield, rfield, ifield type(atlas_trace) :: trace trace = atlas_trace("expand_atlas_mod.F90", __LINE__, "loadvar_atlas", "IO") @@ -52,7 +51,6 @@ subroutine loadvar_atlas(fset, fspace, name, nlon, ngptotg) ifield = (name == "KTYPE") rfield = ((.not. lfield) .and. (.not. ifield)) - !fspace = field%functionspace() nlev = field%levels() nproma = fspace%nproma() ngptot = fspace%size() From 4b1ab64bdc8eaab6bcbcf180e14b8ffd76123191 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Thu, 21 Sep 2023 23:36:51 +0200 Subject: [PATCH 081/174] make compile with Atlas --- src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 5ccb174e..ba2261e4 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -224,8 +224,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) CALL LOAD_SCALAR('KFLDX', SELF%KFLDX) GRID = ATLAS_REGULARLONLATGRID(NGPTOTG,1) - PARTITIONER = ATLAS_PARTITIONER("bands") - FSPACE = ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS(GRID, PARTITIONER, LEVELS=SELF%KLEV, NPROMA=NPROMA, HALO=0) + FSPACE = ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS(GRID, LEVELS=SELF%KLEV, NPROMA=NPROMA, HALO=0) NGPTOT = FSPACE%SIZE() SELF%NBLOCKS = FSPACE%NBLKS() FSET = ATLAS_FIELDSET() From 0b8e593a382309215dbb4d264a12a091677d32c3 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 11:11:58 +0100 Subject: [PATCH 082/174] Increase output verbosity in Actions to debug OpenMP --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 14b2d591..fe0e716f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -137,7 +137,7 @@ jobs: # Build the targets - name: Bundle build run: | - ./cloudsc-bundle build --retry-verbose \ + ./cloudsc-bundle build --verbose --retry-verbose \ --arch=arch/github/ubuntu/${{ matrix.arch }} \ ${{ matrix.prec_flag }} ${{ matrix.io_library_flag }} ${{ matrix.build_flags }} From 27771ef742593a546189d4a338ec15e4c20beb1f Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 11:24:22 +0100 Subject: [PATCH 083/174] Disable atlas for single-precision builds --- .github/workflows/build.yml | 23 +++++++++++++++-------- bundle.yml | 2 +- src/cloudsc_fortran_atlas/CMakeLists.txt | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fe0e716f..4e4188f9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ on: jobs: # This workflow contains a single job called "build" build: - name: Test on ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.prec_flag }} ${{ matrix.build_flags }} + name: ${{ matrix.arch }} ${{ matrix.io_library_flag }} ${{ matrix.build_flags }} # The type of runner that the job will run on runs-on: ubuntu-20.04 @@ -31,12 +31,11 @@ jobs: io_library_flag: ['', '--with-serialbox'] # Switch between Serialbox and HDF5 - prec_flag: ['', '--single-precision'] # Switch single/double precision - build_flags: - '' # Plain build without any options - '--with-gpu --with-loki --with-atlas' # Enable Loki, Atlas, and GPU variants - '--with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, Atlas, and GPU variants with MPI + - '--single-precision --with-gpu --with-loki --with-mpi' # Enable Loki, and GPU variants with MPI in a single-precision build pyiface_flag: [''] # Enable the pyiface variant @@ -48,7 +47,6 @@ jobs: # Add pyiface build configuration for double precision, non-MPI, HDF5 only - arch: gnu/9.4.0 io_library_flag: '' - prec_flag: '' build_flags: '--cloudsc-fortran-pyiface=ON --cloudsc-python-f2py=ON' # Add nvhpc build configurations with serialbox and HDF5 @@ -57,6 +55,11 @@ jobs: io_library_flag: '' build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + - arch: nvhpc/21.9 + nvhpc_version: 21.9 + io_library_flag: '' + build_flags: '--single-precision --with-gpu --with-loki --with-cuda' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '--with-serialbox' @@ -68,6 +71,11 @@ jobs: io_library_flag: '' build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + - arch: nvhpc/23.5 + nvhpc_version: 23.5 + io_library_flag: '' + build_flags: '--single-precision --with-gpu --with-loki --with-cuda' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '--with-serialbox' @@ -139,19 +147,18 @@ jobs: run: | ./cloudsc-bundle build --verbose --retry-verbose \ --arch=arch/github/ubuntu/${{ matrix.arch }} \ - ${{ matrix.prec_flag }} ${{ matrix.io_library_flag }} ${{ matrix.build_flags }} + ${{ matrix.io_library_flag }} ${{ matrix.build_flags }} # Verify targets exist - name: Verify targets env: io_library_flag: ${{ matrix.io_library_flag }} - prec_flag: ${{ matrix.prec_flag }} build_flags: ${{ matrix.build_flags }} run: .github/scripts/verify-targets.sh # Run ctest - name: Run CTest - if: ${{ matrix.prec_flag == '' }} + if: ${{ !contains(matrix.build_flags, '--single-precision') }} working-directory: ./build run: | source env.sh @@ -160,7 +167,7 @@ jobs: # Upload test output - name: Archive CTest output uses: actions/upload-artifact@v3 - if: ${{ matrix.prec_flag == '' }} + if: ${{ !contains(matrix.build_flags, '--single-precision') }} with: name: ctest-log path: build/ctest.log diff --git a/bundle.yml b/bundle.yml index b4b2204d..057d38e5 100644 --- a/bundle.yml +++ b/bundle.yml @@ -141,7 +141,7 @@ options : CLOUDSC_PYTHON_F2PY=ON - with-atlas : - help : Build Atlas and its dependencies (eckit, fckit) and enable Atlas-based variants of CLOUDSC + help : Build Atlas and its dependencies (eckit, fckit) and enable Atlas-based variants of CLOUDSC (incompatible with --single-precision) cmake : > BUILD_eckit=ON BUILD_fckit=ON diff --git a/src/cloudsc_fortran_atlas/CMakeLists.txt b/src/cloudsc_fortran_atlas/CMakeLists.txt index f1e61123..fe9f98fe 100644 --- a/src/cloudsc_fortran_atlas/CMakeLists.txt +++ b/src/cloudsc_fortran_atlas/CMakeLists.txt @@ -9,7 +9,7 @@ # Define this dwarf variant as an ECBuild feature ecbuild_add_option( FEATURE CLOUDSC_FORTRAN_ATLAS DESCRIPTION "Build the Fortran version CLOUDSC using Atlas and Serialbox" DEFAULT ON - CONDITION atlas_FOUND AND (Serialbox_FOUND OR HDF5_FOUND) + CONDITION atlas_FOUND AND (Serialbox_FOUND OR HDF5_FOUND) AND NOT HAVE_SINGLE_PRECISION ) if( HAVE_CLOUDSC_FORTRAN_ATLAS ) From 0e6568c6a105015991a4efac7c19b5ee806045be Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 11:37:34 +0100 Subject: [PATCH 084/174] Enable OpenMP by default and add bundle option to switch off --- bundle.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bundle.yml b/bundle.yml index 057d38e5..c925e01a 100644 --- a/bundle.yml +++ b/bundle.yml @@ -10,6 +10,7 @@ cmake : > BUILD_eckit=OFF BUILD_fckit=OFF BUILD_atlas=OFF + ENABLE_OMP=ON projects : @@ -112,6 +113,10 @@ options : help : Enable MPI-parallel kernel cmake : ENABLE_MPI=ON + - without-openmp : + help : Disable OpenMP + cmake : ENABLE_OMP=OFF + - with-loki : help : Enable Loki source-to-source transformations cmake : > From 484d58c483b9872879c82c19145f74f8bd311381 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 11:55:36 +0100 Subject: [PATCH 085/174] Remove prec_flag from verify-targets.sh --- .github/scripts/verify-targets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index b8726c29..40e29cfa 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -38,7 +38,7 @@ then targets+=(dwarf-cloudsc-loki-idem dwarf-cloudsc-loki-sca) targets+=(dwarf-cloudsc-loki-scc dwarf-cloudsc-loki-scc-hoist) targets+=(dwarf-cloudsc-loki-idem-stack dwarf-cloudsc-loki-scc-stack) - if [[ "$prec_flag" != "--single-precision" ]] + if [[ "$build_flags" != *"--single-precision"* ]] then targets+=(dwarf-cloudsc-loki-c) fi From 74a384859dd5358fa43f3ad7b34809b28fa84934 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 12:50:25 +0100 Subject: [PATCH 086/174] Rename to cloudsc-c-cuda to avoid ecbuild option name clash --- src/cloudsc_cuda/CMakeLists.txt | 84 ++++++++++++++++----------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/src/cloudsc_cuda/CMakeLists.txt b/src/cloudsc_cuda/CMakeLists.txt index 15565d81..3112b8b9 100644 --- a/src/cloudsc_cuda/CMakeLists.txt +++ b/src/cloudsc_cuda/CMakeLists.txt @@ -7,24 +7,23 @@ # nor does it submit to any jurisdiction. # Define this dwarf variant as an ECBuild feature -ecbuild_add_option( FEATURE CLOUDSC_CUDA - DESCRIPTION "Build the CUDA version CLOUDSC using Serialbox" DEFAULT ON +ecbuild_add_option( FEATURE CLOUDSC_C_CUDA + DESCRIPTION "Build the CUDA version of CLOUDSC C using Serialbox" DEFAULT ON CONDITION Serialbox_FOUND AND HAVE_CUDA ) if( HAVE_CLOUDSC_CUDA ) - enable_language(CUDA) enable_language(CXX) ###### SCC-CUDA #### ecbuild_add_library( - TARGET dwarf-cloudsc-cuda-lib - INSTALL_HEADERS LISTED + TARGET dwarf-cloudsc-c-cuda-lib + INSTALL_HEADERS LISTED SOURCES - cloudsc/yoecldp_c.h - cloudsc/load_state.h + cloudsc/yoecldp_c.h + cloudsc/load_state.h cloudsc/load_state.cu cloudsc/cloudsc_c.h cloudsc/cloudsc_c.cu @@ -43,30 +42,30 @@ if( HAVE_CLOUDSC_CUDA ) ) target_include_directories( - dwarf-cloudsc-cuda-lib - PUBLIC - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} + dwarf-cloudsc-c-cuda-lib + PUBLIC + ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - target_compile_options(dwarf-cloudsc-cuda-lib PRIVATE $<$>) + target_compile_options(dwarf-cloudsc-c-cuda-lib PRIVATE $<$>) else() - target_compile_options(dwarf-cloudsc-cuda-lib PRIVATE $<$: - -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>) + target_compile_options(dwarf-cloudsc-c-cuda-lib PRIVATE $<$: + -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>) endif() - set_target_properties( dwarf-cloudsc-cuda-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON) - + set_target_properties( dwarf-cloudsc-c-cuda-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + ecbuild_add_executable( - TARGET dwarf-cloudsc-cuda + TARGET dwarf-cloudsc-c-cuda SOURCES dwarf_cloudsc.cpp - LIBS dwarf-cloudsc-cuda-lib + LIBS dwarf-cloudsc-c-cuda-lib ) - target_link_libraries(dwarf-cloudsc-cuda dwarf-cloudsc-cuda-lib) + target_link_libraries(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-lib) ecbuild_add_test( - TARGET dwarf-cloudsc-cuda-serial - COMMAND bin/dwarf-cloudsc-cuda + TARGET dwarf-cloudsc-c-cuda-serial + COMMAND bin/dwarf-cloudsc-c-cuda ARGS 1 1000 128 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 @@ -75,7 +74,7 @@ if( HAVE_CLOUDSC_CUDA ) ###### SCC-CUDA-HOIST #### ecbuild_add_library( - TARGET dwarf-cloudsc-cuda-hoist-lib + TARGET dwarf-cloudsc-c-cuda-hoist-lib INSTALL_HEADERS LISTED SOURCES cloudsc/yoecldp_c.h @@ -98,30 +97,30 @@ if( HAVE_CLOUDSC_CUDA ) ) target_include_directories( - dwarf-cloudsc-cuda-hoist-lib + dwarf-cloudsc-c-cuda-hoist-lib PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - target_compile_options(dwarf-cloudsc-cuda-hoist-lib PRIVATE $<$>) + target_compile_options(dwarf-cloudsc-c-cuda-hoist-lib PRIVATE $<$>) else() - target_compile_options(dwarf-cloudsc-cuda-hoist-lib PRIVATE $<$: + target_compile_options(dwarf-cloudsc-c-cuda-hoist-lib PRIVATE $<$: -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>) endif() - set_target_properties( dwarf-cloudsc-cuda-hoist-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + set_target_properties( dwarf-cloudsc-c-cuda-hoist-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON) ecbuild_add_executable( - TARGET dwarf-cloudsc-cuda-hoist + TARGET dwarf-cloudsc-c-cuda-hoist SOURCES dwarf_cloudsc.cpp - LIBS dwarf-cloudsc-cuda-hoist-lib + LIBS dwarf-cloudsc-c-cuda-hoist-lib ) - target_link_libraries(dwarf-cloudsc-cuda-hoist dwarf-cloudsc-cuda-hoist-lib) - + target_link_libraries(dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-hoist-lib) + ecbuild_add_test( - TARGET dwarf-cloudsc-cuda-hoist-serial - COMMAND bin/dwarf-cloudsc-cuda-hoist + TARGET dwarf-cloudsc-c-cuda-hoist-serial + COMMAND bin/dwarf-cloudsc-c-cuda-hoist ARGS 1 1000 128 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 @@ -130,7 +129,7 @@ if( HAVE_CLOUDSC_CUDA ) ###### SCC-CUDA-K-CACHING #### ecbuild_add_library( - TARGET dwarf-cloudsc-cuda-k-caching-lib + TARGET dwarf-cloudsc-c-cuda-k-caching-lib INSTALL_HEADERS LISTED SOURCES cloudsc/yoecldp_c.h @@ -153,29 +152,29 @@ if( HAVE_CLOUDSC_CUDA ) ) target_include_directories( - dwarf-cloudsc-cuda-k-caching-lib + dwarf-cloudsc-c-cuda-k-caching-lib PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ) if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - target_compile_options(dwarf-cloudsc-cuda-k-caching-lib PRIVATE $<$>) + target_compile_options(dwarf-cloudsc-c-cuda-k-caching-lib PRIVATE $<$>) else() - target_compile_options(dwarf-cloudsc-cuda-k-caching-lib PRIVATE $<$: + target_compile_options(dwarf-cloudsc-c-cuda-k-caching-lib PRIVATE $<$: -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>) endif() - set_target_properties( dwarf-cloudsc-cuda-k-caching-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + set_target_properties( dwarf-cloudsc-c-cuda-k-caching-lib PROPERTIES CUDA_SEPARABLE_COMPILATION ON) ecbuild_add_executable( - TARGET dwarf-cloudsc-cuda-k-caching + TARGET dwarf-cloudsc-c-cuda-k-caching SOURCES dwarf_cloudsc.cpp - LIBS dwarf-cloudsc-cuda-k-caching-lib + LIBS dwarf-cloudsc-c-cuda-k-caching-lib ) - target_link_libraries(dwarf-cloudsc-cuda-k-caching dwarf-cloudsc-cuda-k-caching-lib) - + target_link_libraries(dwarf-cloudsc-c-cuda-k-caching dwarf-cloudsc-c-cuda-k-caching-lib) + ecbuild_add_test( - TARGET dwarf-cloudsc-cuda-k-caching-serial - COMMAND bin/dwarf-cloudsc-cuda-k-caching + TARGET dwarf-cloudsc-c-cuda-k-caching-serial + COMMAND bin/dwarf-cloudsc-c-cuda-k-caching ARGS 1 1000 128 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 @@ -189,4 +188,3 @@ if( HAVE_CLOUDSC_CUDA ) else() ecbuild_info( "Serialbox and/or CUDA not found, disabling CUDA prototype(s)" ) endif() - From 6d86b95772d3340124b8b6ec959d2daf55f8892f Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 13:08:26 +0100 Subject: [PATCH 087/174] Use latest releases for ecbuild/eckit/fckit/atlas --- bundle.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bundle.yml b/bundle.yml index c925e01a..3b30e8c1 100644 --- a/bundle.yml +++ b/bundle.yml @@ -16,7 +16,7 @@ projects : - ecbuild : git : https://github.com/ecmwf/ecbuild - version : 3.7.0 + version : 3.8.0 bundle : false - serialbox : @@ -43,7 +43,7 @@ projects : - eckit : git : https://github.com/ecmwf/eckit - version : develop + version : 1.24.4 optional: true require : ecbuild cmake : > @@ -52,7 +52,7 @@ projects : - fckit : git : https://github.com/ecmwf/fckit - version : develop + version : 0.11.0 optional: true require : ecbuild eckit cmake : > @@ -60,7 +60,7 @@ projects : - atlas : git : https://github.com/ecmwf/atlas - version : feature/MultiField + version : 0.34.0 optional: true require : ecbuild eckit fckit cmake : > From 6527babe7ab01f034b6a196685478887f1068923 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 13:57:43 +0100 Subject: [PATCH 088/174] Fix CUDA options in bundle --- bundle.yml | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/bundle.yml b/bundle.yml index 3b30e8c1..a96bc35a 100644 --- a/bundle.yml +++ b/bundle.yml @@ -101,10 +101,9 @@ options : ENABLE_CLOUDSC_GPU_OMP_SCC_HOIST=ON - with-cuda : - help : Enable GPU kernel variant based on CUDA-Fortran + help : Enable GPU kernel variants based on CUDA and CUDA-Fortran cmake : > - ENABLE_FIELD_API_CUDA=ON - ENABLE_CLOUDSC_CUDA=ON + ENABLE_CUDA=ON ENABLE_CLOUDSC_GPU_SCC_CUF=ON ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON BUILD_field_api=ON @@ -180,10 +179,6 @@ options : help : Build the C version of CLOUDSC [ON|OFF] cmake : ENABLE_CLOUDSC_C={{value}} - - cloudsc-cuda : - help : Build the CUDA C version of CLOUDSC [ON|OFF] - cmake : ENABLE_CLOUDSC_CUDA={{value}} - - cloudsc-gpu-claw : help : Build the deprecated CLAW-based GPU version CLOUDSC [ON|OFF] cmake : ENABLE_CLOUDSC_GPU_CLAW={{value}} From 6780b0e51abab59459e86767cb7fdbb9c93a44f3 Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Fri, 22 Sep 2023 15:02:46 +0200 Subject: [PATCH 089/174] Fix compilation with atlas 0.34.0 --- src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 | 3 ++- src/cloudsc_fortran_atlas/expand_atlas_mod.F90 | 6 ++++-- src/cloudsc_fortran_atlas/validate_atlas_mod.F90 | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index ba9937f4..690f9fb7 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -55,7 +55,8 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, KFLDX, PTSPHY) FIELD = FSET%FIELD("PEXTRA") FSPACE = FIELD%FUNCTIONSPACE() - NPROMA = FSPACE%NPROMA() + !NPROMA = FSPACE%NPROMA() + NPROMA = FIELD%SHAPE(1) NLEV = FSPACE%LEVELS() NGPTOT = FSPACE%SIZE() diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index 16cf8f38..ec91ddf2 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -51,7 +51,8 @@ subroutine loadvar_atlas(fset, name, nlon, ngptotg) fspace = field%functionspace() nlev = field%levels() - nproma = fspace%nproma() + !nproma = fspace%nproma() + nproma = field%shape(1) ngptot = fspace%size() nblocks = fspace%nblks() @@ -116,7 +117,8 @@ subroutine loadstate_atlas(fset, name, nlon, ngptotg) fspace = field%functionspace() nlev = field%levels() ngptot = fspace%size() - nproma = fspace%nproma() + !nproma = fspace%nproma() + nproma = field%shape(1) nblocks = fspace%nblks() ndim = field%shape(3) - 3 diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 3ebcebc0..9a9f8e6f 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -64,7 +64,8 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) NLEV = FIELD%LEVELS() NGPTOT = FSPACE%SIZE() NBLOCKS = FSPACE%NBLKS() - NPROMA = FSPACE%NPROMA() + !NPROMA = FSPACE%NPROMA() + NPROMA = FIELD%SHAPE(1) ZMINVAL(1) = +HUGE(ZMINVAL(1)) ZMAX_VAL_ERR(1) = -HUGE(ZMAX_VAL_ERR(1)) From 32be5227d1cb22e35c4c966138610c413576cd05 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 14:16:59 +0100 Subject: [PATCH 090/174] Use project-specific CUDA option --- bundle.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bundle.yml b/bundle.yml index a96bc35a..ffe1b0fa 100644 --- a/bundle.yml +++ b/bundle.yml @@ -49,6 +49,7 @@ projects : cmake : > ECKIT_ENABLE_TESTS=OFF ECKIT_ENABLE_BUILD_TOOLS=OFF + ECKIT_ENABLE_CUDA=OFF - fckit : git : https://github.com/ecmwf/fckit @@ -65,6 +66,7 @@ projects : require : ecbuild eckit fckit cmake : > ATLAS_ENABLE_TESTS=OFF + ATLAS_ENABLE_CUDA=OFF - field_api : git : ${BITBUCKET}/rdx/field_api From 692a98c948ce5282805ad24d7d9a9a2580b6e38b Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Sep 2023 15:38:08 +0100 Subject: [PATCH 091/174] Remove obsolete CMake workaround --- arch/toolchains/github-ubuntu-nvhpc.cmake | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/toolchains/github-ubuntu-nvhpc.cmake b/arch/toolchains/github-ubuntu-nvhpc.cmake index be437031..deda8b6f 100644 --- a/arch/toolchains/github-ubuntu-nvhpc.cmake +++ b/arch/toolchains/github-ubuntu-nvhpc.cmake @@ -19,13 +19,7 @@ set( ECBUILD_FIND_MPI ON ) # Note: OpenMP_Fortran_FLAGS gets overwritten by the FindOpenMP module # unless its stored as a cache variable set( OpenMP_Fortran_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) - -# Note: OpenMP_C_FLAGS and OpenMP_C_LIB_NAMES have to be provided _both_ to -# keep FindOpenMP from overwriting the FLAGS variable (the cache entry alone -# doesn't have any effect here as the module uses FORCE to overwrite the -# existing value) set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) -set( OpenMP_C_LIB_NAMES "acchost" CACHE STRING "") #################################################################### # OpenAcc FLAGS From 9931f882109bddf74dbfb877f5c1d4e7bcc8f4c7 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 25 Sep 2023 16:07:56 +0100 Subject: [PATCH 092/174] Fix binary names for CUDA variants in verify-targets.sh --- .github/scripts/verify-targets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 40e29cfa..41c8caee 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -28,7 +28,7 @@ then targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching) if [[ "$io_library_flag" == "--with-serialbox" ]] then - targets+=(dwarf-cloudsc-cuda dwarf-cloudsc-cuda-hoist dwarf-cloudsc-cuda-k-caching) + targets+=(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-k-caching) fi fi fi From bba1dc3f6e7e1dbd69fc787d22cf5ded7141e87f Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Mon, 25 Sep 2023 17:26:48 +0100 Subject: [PATCH 093/174] Fix NVHPC builds --- .github/workflows/build.yml | 12 ++++++------ arch/github/ubuntu/nvhpc/21.9/env.sh | 4 +++- arch/github/ubuntu/nvhpc/23.5/env.sh | 4 +++- src/cloudsc_cuda/CMakeLists.txt | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4e4188f9..b881a63f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -54,33 +54,33 @@ jobs: nvhpc_version: 21.9 io_library_flag: '' build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' build_flags: '--single-precision --with-gpu --with-loki --with-cuda' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '--with-serialbox' build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' build_flags: '--single-precision --with-gpu --with-loki --with-cuda' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '--with-serialbox' build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE # Steps represent a sequence of tasks that will be executed as part of the job steps: diff --git a/arch/github/ubuntu/nvhpc/21.9/env.sh b/arch/github/ubuntu/nvhpc/21.9/env.sh index de7afa2f..6e05f756 100644 --- a/arch/github/ubuntu/nvhpc/21.9/env.sh +++ b/arch/github/ubuntu/nvhpc/21.9/env.sh @@ -11,16 +11,18 @@ ### Variables export NVHPC_INSTALL_DIR=${GITHUB_WORKSPACE}/nvhpc-install export NVHPC_VERSION=21.9 +export CUDA_VERSION=11.4 export NVHPC_DIR=${NVHPC_INSTALL_DIR}/Linux_x86_64/${NVHPC_VERSION} ### Compilers export PATH=${NVHPC_DIR}/compilers/bin:${PATH} export NVHPC_LIBRARY_PATH=${NVHPC_DIR}/compilers/lib -export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH} +export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH}:${LD_LIBRARY_PATH} ### MPI export MPI_HOME=${NVHPC_DIR}/comm_libs/mpi export PATH=${MPI_HOME}/bin:${PATH} +export LD_LIBRARY_PATH=${NVHPC_DIR}/cuda/${CUDA_VERSION}/targets/x86_64-linux/lib/stubs:${LD_LIBRARY_PATH} ### HDF5 export HDF5_DIR=${GITHUB_WORKSPACE}/hdf5-install diff --git a/arch/github/ubuntu/nvhpc/23.5/env.sh b/arch/github/ubuntu/nvhpc/23.5/env.sh index dfbac6e0..a2261c25 100644 --- a/arch/github/ubuntu/nvhpc/23.5/env.sh +++ b/arch/github/ubuntu/nvhpc/23.5/env.sh @@ -11,16 +11,18 @@ ### Variables export NVHPC_INSTALL_DIR=${GITHUB_WORKSPACE}/nvhpc-install export NVHPC_VERSION=23.5 +export CUDA_VERSION=12.1 export NVHPC_DIR=${NVHPC_INSTALL_DIR}/Linux_x86_64/${NVHPC_VERSION} ### Compilers export PATH=${NVHPC_DIR}/compilers/bin:${PATH} export NVHPC_LIBRARY_PATH=${NVHPC_DIR}/compilers/lib -export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH} +export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH}:${LD_LIBRARY_PATH} ### MPI export MPI_HOME=${NVHPC_DIR}/comm_libs/mpi export PATH=${MPI_HOME}/bin:${PATH} +export LD_LIBRARY_PATH=${NVHPC_DIR}/cuda/${CUDA_VERSION}/targets/x86_64-linux/lib/stubs:${LD_LIBRARY_PATH} ### HDF5 export HDF5_DIR=${GITHUB_WORKSPACE}/hdf5-install diff --git a/src/cloudsc_cuda/CMakeLists.txt b/src/cloudsc_cuda/CMakeLists.txt index 3112b8b9..5393d3e6 100644 --- a/src/cloudsc_cuda/CMakeLists.txt +++ b/src/cloudsc_cuda/CMakeLists.txt @@ -12,7 +12,7 @@ ecbuild_add_option( FEATURE CLOUDSC_C_CUDA CONDITION Serialbox_FOUND AND HAVE_CUDA ) -if( HAVE_CLOUDSC_CUDA ) +if( HAVE_CLOUDSC_C_CUDA ) enable_language(CUDA) enable_language(CXX) From 83785c7c34cfe84590bef1dbb753e20574c110d5 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 14:40:42 +0000 Subject: [PATCH 094/174] make serialbox work in the Atlas implementation --- .../cloudsc_driver_mod.F90 | 2 +- .../cloudsc_global_atlas_state_mod.F90 | 56 ++++++++----------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index 690f9fb7..17915207 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -53,7 +53,7 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, KFLDX, PTSPHY) POWER_TOTAL = 0_JPIB POWER_COUNT = 0_JPIB - FIELD = FSET%FIELD("PEXTRA") + FIELD = FSET%FIELD(1) FSPACE = FIELD%FUNCTIONSPACE() !NPROMA = FSPACE%NPROMA() NPROMA = FIELD%SHAPE(1) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index ba2261e4..80ce6b96 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -34,12 +34,12 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD REAL(C_DOUBLE), POINTER :: PTR(:,:) END TYPE - CHARACTER(LEN=10), PARAMETER, DIMENSION(30) :: IN_VAR_NAMES = (/ & + CHARACTER(LEN=10), PARAMETER, DIMENSION(29) :: IN_VAR_NAMES = (/ & "PLCRIT_AER", "PICRIT_AER", "PRE_ICE ", "PCCN ", "PNICE ", "PT ", "PQ ", & "PVFA ", "PVFL ", "PVFI ", "PDYNA ", "PDYNL ", "PDYNI ", "PHRSW ", & "PHRLW ", "PVERVEL ", "PAP ", "PLU ", "PLUDE ", "PSNDE ", "PMFU ", & "PMFD ", "PA ", "PSUPSAT ", & - "PLSM ", "LDCUM ", "KTYPE ", "PAPH ", "PEXTRA ", "PCLV " /) + "PLSM ", "LDCUM ", "KTYPE ", "PAPH ", "PCLV " /) CHARACTER(LEN=16), PARAMETER, DIMENSION(16) :: OUT_VAR_NAMES = (/ & "PFSQLF ", "PFSQIF ", "PFCQLNG ", "PFCQNNG ", "PFSQRF ", & "PFSQSF ", "PFCQRNG ", "PFCQSNG ", "PFSQLTUR ", "PFSQITUR ", & @@ -80,7 +80,6 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD LOGICAL, POINTER, CONTIGUOUS :: LDCUM(:) ! Convection active INTEGER, POINTER, CONTIGUOUS :: KTYPE(:) ! Convection type 0,1,2 REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PAPH(:,:) ! Pressure on half levels - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PEXTRA(:,:,:) ! extra fields REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCLV(:,:,:) TYPE(STATE_TYPE) :: TENDENCY_CML ! cumulative tendency used for final output @@ -163,41 +162,40 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) CALL FSET%DATA(26, SELF%LDCUM, IBLK) CALL FSET%DATA(27, SELF%KTYPE, IBLK) CALL FSET%DATA(28, SELF%PAPH, IBLK) - CALL FSET%DATA(29, SELF%PEXTRA, IBLK) - CALL FSET%DATA(30, SELF%PCLV, IBLK) + CALL FSET%DATA(29, SELF%PCLV, IBLK) - CALL FSET%DATA(31, TMP3D, IBLK) + CALL FSET%DATA(30, TMP3D, IBLK) SELF%TENDENCY_CML%T => TMP3D(:,:,1) SELF%TENDENCY_CML%A => TMP3D(:,:,2) SELF%TENDENCY_CML%Q => TMP3D(:,:,3) SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(32, TMP3D, IBLK) + CALL FSET%DATA(31, TMP3D, IBLK) SELF%TENDENCY_TMP%T => TMP3D(:,:,1) SELF%TENDENCY_TMP%A => TMP3D(:,:,2) SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(33, TMP3D, IBLK) + CALL FSET%DATA(32, TMP3D, IBLK) SELF%TENDENCY_LOC%T => TMP3D(:,:,1) SELF%TENDENCY_LOC%A => TMP3D(:,:,2) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(34, SELF%PFSQLF, IBLK) - CALL FSET%DATA(35, SELF%PFSQIF, IBLK) - CALL FSET%DATA(36, SELF%PFCQLNG, IBLK) - CALL FSET%DATA(37, SELF%PFCQNNG, IBLK) - CALL FSET%DATA(38, SELF%PFSQRF, IBLK) - CALL FSET%DATA(39, SELF%PFSQSF, IBLK) - CALL FSET%DATA(40, SELF%PFCQRNG, IBLK) - CALL FSET%DATA(41, SELF%PFCQSNG, IBLK) - CALL FSET%DATA(42, SELF%PFSQLTUR, IBLK) - CALL FSET%DATA(43, SELF%PFSQITUR, IBLK) - CALL FSET%DATA(44, SELF%PFPLSL, IBLK) - CALL FSET%DATA(45, SELF%PFPLSN, IBLK) - CALL FSET%DATA(46, SELF%PFHPSL, IBLK) - CALL FSET%DATA(47, SELF%PFHPSN, IBLK) - CALL FSET%DATA(48, SELF%PCOVPTOT, IBLK) - CALL FSET%DATA(49, SELF%PRAINFRAC_TOPRFZ, IBLK) + CALL FSET%DATA(33, SELF%PFSQLF, IBLK) + CALL FSET%DATA(34, SELF%PFSQIF, IBLK) + CALL FSET%DATA(35, SELF%PFCQLNG, IBLK) + CALL FSET%DATA(36, SELF%PFCQNNG, IBLK) + CALL FSET%DATA(37, SELF%PFSQRF, IBLK) + CALL FSET%DATA(38, SELF%PFSQSF, IBLK) + CALL FSET%DATA(39, SELF%PFCQRNG, IBLK) + CALL FSET%DATA(40, SELF%PFCQSNG, IBLK) + CALL FSET%DATA(41, SELF%PFSQLTUR, IBLK) + CALL FSET%DATA(42, SELF%PFSQITUR, IBLK) + CALL FSET%DATA(43,SELF%PFPLSL, IBLK) + CALL FSET%DATA(44, SELF%PFPLSN, IBLK) + CALL FSET%DATA(45, SELF%PFHPSL, IBLK) + CALL FSET%DATA(46, SELF%PFHPSN, IBLK) + CALL FSET%DATA(47, SELF%PCOVPTOT, IBLK) + CALL FSET%DATA(48, SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) @@ -229,14 +227,14 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) SELF%NBLOCKS = FSPACE%NBLKS() FSET = ATLAS_FIELDSET() - DO IVAR = 1, SIZE(IN_VAR_NAMES) - 6 ! last six variables are special + DO IVAR = 1, SIZE(IN_VAR_NAMES) - 5 ! the last variables are special CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(IN_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) ENDDO CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PLSM", KIND=ATLAS_REAL(JPRB), LEVELS=0)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="LDCUM", KIND=ATLAS_LOGICAL(), LEVELS=0)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="KTYPE", KIND=ATLAS_INTEGER(JPIM), LEVELS=0)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PAPH", KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) - CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PEXTRA", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,SELF%KFLDX))) + !CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PEXTRA", KIND=ATLAS_REAL(JPRB), VARIABLES=SELF%KFLDX)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCLV", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,NCLV))) DO IVAR = 1, SIZE(IN_VAR_NAMES) @@ -250,8 +248,6 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) FIELD = FSPACE%CREATE_FIELD(NAME='TENDENCY_LOC', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV) CALL FSET%ADD(FIELD) - ! The STATE_TYPE arrays are tricky, as the AOSOA layout needs to be expictly - ! unrolled at every step, and we rely on dirty hackery to do this. CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_CML', KLON, NGPTOTG) CALL LOADSTATE_ATLAS(FSET, 'TENDENCY_TMP', KLON, NGPTOTG) ! Output fields are simply allocated and zero'd @@ -270,10 +266,6 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) END DO !$omp end parallel do ENDDO - ! DEBUG - !FIELD = FSET%FIELD("PAP") - !call field%data(tmp3d) - !print *, MINVAL(tmp3d), MAXVAL(tmp3d) FIELD = FSET%FIELD("PRAINFRAC_TOPRFZ") CALL FIELD%DATA(TMP2D) From 261a577920308370536e9368e33a4468b2896e57 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 14:54:39 +0000 Subject: [PATCH 095/174] support single precision in expand_atlas_mod --- src/cloudsc_fortran_atlas/CMakeLists.txt | 2 +- src/cloudsc_fortran_atlas/expand_atlas_mod.F90 | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cloudsc_fortran_atlas/CMakeLists.txt b/src/cloudsc_fortran_atlas/CMakeLists.txt index fe9f98fe..f1e61123 100644 --- a/src/cloudsc_fortran_atlas/CMakeLists.txt +++ b/src/cloudsc_fortran_atlas/CMakeLists.txt @@ -9,7 +9,7 @@ # Define this dwarf variant as an ECBuild feature ecbuild_add_option( FEATURE CLOUDSC_FORTRAN_ATLAS DESCRIPTION "Build the Fortran version CLOUDSC using Atlas and Serialbox" DEFAULT ON - CONDITION atlas_FOUND AND (Serialbox_FOUND OR HDF5_FOUND) AND NOT HAVE_SINGLE_PRECISION + CONDITION atlas_FOUND AND (Serialbox_FOUND OR HDF5_FOUND) ) if( HAVE_CLOUDSC_FORTRAN_ATLAS ) diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index ec91ddf2..435b88f1 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -19,7 +19,7 @@ module expand_atlas_mod use file_io_mod, only: input_initialize, load_scalar, load_array use expand_mod, only: get_offsets, expand - use, intrinsic :: iso_c_binding, only : c_int, c_double + use, intrinsic :: iso_c_binding, only : c_int implicit none @@ -37,7 +37,7 @@ subroutine loadvar_atlas(fset, name, nlon, ngptotg) real(kind=jprb), allocatable :: buffer_r1(:), buffer_r2(:,:), buffer_r3(:,:,:) integer(kind=jpim), allocatable :: buffer_i1(:) logical, allocatable :: buffer_l1(:) - real(c_double), pointer :: field_r1(:,:), field_r2(:,:,:), field_r3(:,:,:,:) + real(kind=jprb), pointer :: field_r1(:,:), field_r2(:,:,:), field_r3(:,:,:,:) integer(c_int), pointer :: field_i1(:,:) logical, pointer :: field_l1(:,:) type(atlas_functionspace_blockstructuredcolumns) :: fspace @@ -111,7 +111,7 @@ subroutine loadstate_atlas(fset, name, nlon, ngptotg) type(atlas_functionspace_blockstructuredcolumns) :: fspace real(kind=jprb), allocatable :: buffer(:,:,:) - real(c_double), pointer :: field_r3(:,:,:,:) + real(kind=jprb), pointer :: field_r3(:,:,:,:) field = fset%field(name) fspace = field%functionspace() From ebe59028a236079edb06707e8c3571fbe0348cd2 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 14:58:13 +0000 Subject: [PATCH 096/174] add single precision tests for Atlas --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b881a63f..31f74c08 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,7 +35,7 @@ jobs: - '' # Plain build without any options - '--with-gpu --with-loki --with-atlas' # Enable Loki, Atlas, and GPU variants - '--with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, Atlas, and GPU variants with MPI - - '--single-precision --with-gpu --with-loki --with-mpi' # Enable Loki, and GPU variants with MPI in a single-precision build + - '--single-precision --with-gpu --with-loki --with-atlas --with-mpi' # Enable Loki, and GPU variants with MPI in a single-precision build pyiface_flag: [''] # Enable the pyiface variant @@ -58,7 +58,7 @@ jobs: - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' - build_flags: '--single-precision --with-gpu --with-loki --with-cuda' + build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-atlas' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/21.9 nvhpc_version: 21.9 @@ -74,7 +74,7 @@ jobs: - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' - build_flags: '--single-precision --with-gpu --with-loki --with-cuda' + build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-atlas' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 From 74c753b45dda0ec658adfca376125c46ff4c5115 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 15:19:46 +0000 Subject: [PATCH 097/174] fixed for single precision in Atlas structures --- .../cloudsc_global_atlas_state_mod.F90 | 98 +++++++++---------- .../dwarf_cloudsc_atlas.F90 | 1 - .../validate_atlas_mod.F90 | 2 +- 3 files changed, 50 insertions(+), 51 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 80ce6b96..527a57a1 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -28,10 +28,10 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD IMPLICIT NONE TYPE VAR3D_PTR - REAL(C_DOUBLE), POINTER :: PTR(:,:,:) + REAL(KIND=JPRB), POINTER :: PTR(:,:,:) END TYPE TYPE VAR2D_PTR - REAL(C_DOUBLE), POINTER :: PTR(:,:) + REAL(KIND=JPRB), POINTER :: PTR(:,:) END TYPE CHARACTER(LEN=10), PARAMETER, DIMENSION(29) :: IN_VAR_NAMES = (/ & @@ -51,58 +51,58 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD TYPE(VAR2D_PTR), DIMENSION(15) :: OUT_VARS_2D_REAL64 ! Input field variables and tendencies - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLCRIT_AER(:,:) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PICRIT_AER(:,:) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PRE_ICE(:,:) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCCN(:,:) ! liquid cloud condensation nuclei - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PNICE(:,:) ! ice number concentration (cf. CCN) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PT(:,:) ! T at start of callpar - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PQ(:,:) ! Q at start of callpar - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFA(:,:) ! CC from VDF scheme - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFL(:,:) ! Liq from VDF scheme - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFI(:,:) ! Ice from VDF scheme - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNA(:,:) ! CC from Dynamics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNL(:,:) ! Liq from Dynamics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNI(:,:) ! Liq from Dynamics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PHRSW(:,:) ! Short-wave heating rate - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PHRLW(:,:) ! Long-wave heating rate - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVERVEL(:,:) ! Vertical velocity - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PAP(:,:) ! Pressure on full levels - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLU(:,:) ! Conv. condensate - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLUDE(:,:) ! Conv. detrained water - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PSNDE(:,:) ! Conv. detrained snow - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PMFU(:,:) ! Conv. mass flux up - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PMFD(:,:) ! Conv. mass flux down - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PA(:,:) ! Original Cloud fraction (t) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PSUPSAT(:,:) - - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLSM(:) ! Land fraction (0-1) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLCRIT_AER(:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PICRIT_AER(:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PRE_ICE(:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PCCN(:,:) ! liquid cloud condensation nuclei + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PNICE(:,:) ! ice number concentration (cf. CCN) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PT(:,:) ! T at start of callpar + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PQ(:,:) ! Q at start of callpar + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVFA(:,:) ! CC from VDF scheme + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVFL(:,:) ! Liq from VDF scheme + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVFI(:,:) ! Ice from VDF scheme + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PDYNA(:,:) ! CC from Dynamics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PDYNL(:,:) ! Liq from Dynamics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PDYNI(:,:) ! Liq from Dynamics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PHRSW(:,:) ! Short-wave heating rate + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PHRLW(:,:) ! Long-wave heating rate + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVERVEL(:,:) ! Vertical velocity + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PAP(:,:) ! Pressure on full levels + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLU(:,:) ! Conv. condensate + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLUDE(:,:) ! Conv. detrained water + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PSNDE(:,:) ! Conv. detrained snow + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PMFU(:,:) ! Conv. mass flux up + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PMFD(:,:) ! Conv. mass flux down + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PA(:,:) ! Original Cloud fraction (t) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PSUPSAT(:,:) + + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLSM(:) ! Land fraction (0-1) LOGICAL, POINTER, CONTIGUOUS :: LDCUM(:) ! Convection active INTEGER, POINTER, CONTIGUOUS :: KTYPE(:) ! Convection type 0,1,2 - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PAPH(:,:) ! Pressure on half levels - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCLV(:,:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PAPH(:,:) ! Pressure on half levels + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PCLV(:,:,:) TYPE(STATE_TYPE) :: TENDENCY_CML ! cumulative tendency used for final output TYPE(STATE_TYPE) :: TENDENCY_TMP ! cumulative tendency used as input TYPE(STATE_TYPE) :: TENDENCY_LOC ! local tendency from cloud scheme ! Output fields used for validation - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQLF(:,:) ! Flux of liquid - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQIF(:,:) ! Flux of ice - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQLNG(:,:) ! -ve corr for liq - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQNNG(:,:) ! -ve corr for ice - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQRF(:,:) ! Flux diagnostics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQSF(:,:) ! for DDH, generic - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQRNG(:,:) ! rain - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQSNG(:,:) ! snow - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQLTUR(:,:) ! liquid flux due to VDF - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQITUR(:,:) ! ice flux due to VDF - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFPLSL(:,:) ! liq+rain sedim flux - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFPLSN(:,:) ! ice+snow sedim flux - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFHPSL(:,:) ! Enthalpy flux for liq - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFHPSN(:,:) ! Enthalpy flux for ice - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCOVPTOT(:,:) ! Precip fraction - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PRAINFRAC_TOPRFZ(:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQLF(:,:) ! Flux of liquid + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQIF(:,:) ! Flux of ice + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQLNG(:,:) ! -ve corr for liq + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQNNG(:,:) ! -ve corr for ice + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQRF(:,:) ! Flux diagnostics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQSF(:,:) ! for DDH, generic + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQRNG(:,:) ! rain + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQSNG(:,:) ! snow + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQLTUR(:,:) ! liquid flux due to VDF + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQITUR(:,:) ! ice flux due to VDF + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFPLSL(:,:) ! liq+rain sedim flux + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFPLSN(:,:) ! ice+snow sedim flux + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFHPSL(:,:) ! Enthalpy flux for liq + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFHPSN(:,:) ! Enthalpy flux for ice + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PCOVPTOT(:,:) ! Precip fraction + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PRAINFRAC_TOPRFZ(:) CONTAINS PROCEDURE :: GET_BLOCK => CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK @@ -132,7 +132,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) CLASS(ATLAS_FIELDSET), INTENT(INOUT) :: FSET INTEGER, INTENT(IN) :: IBLK - REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) + REAL(KIND=JPRB), POINTER :: TMP3D(:,:,:) CALL FSET%DATA(1, SELF%PLCRIT_AER, IBLK) CALL FSET%DATA(2, SELF%PICRIT_AER, IBLK) @@ -210,8 +210,8 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, NPROMA, NGPTOTG) INTEGER(KIND=JPIM) :: KLON, IVAR, B TYPE(VAR3D_PTR), DIMENSION(24) :: IN_VARS_3D_REAL64 TYPE(VAR3D_PTR), DIMENSION(15) :: OUT_VARS_3D_REAL64 - REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) - REAL(C_DOUBLE), POINTER :: TMP2D(:,:) + REAL(KIND=JPRB), POINTER :: TMP3D(:,:,:) + REAL(KIND=JPRB), POINTER :: TMP2D(:,:) TYPE(ATLAS_FIELD) :: FIELD TYPE(ATLAS_PARTITIONER) :: PARTITIONER INTEGER(KIND=JPIM) :: NGPTOT diff --git a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 index dd8470cb..83fbf103 100644 --- a/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 +++ b/src/cloudsc_fortran_atlas/dwarf_cloudsc_atlas.F90 @@ -27,7 +27,6 @@ PROGRAM DWARF_CLOUDSC INTEGER(KIND=JPIM) :: NGPTOTG = 16384 ! Number of grid points (as read from command line) INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active) -REAL(c_double), pointer :: tmp3d(:,:,:) type(atlas_fieldset) :: fset type(atlas_field) :: field diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 9a9f8e6f..34f5b1b9 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -44,7 +44,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) CHARACTER(*), INTENT(IN), OPTIONAL :: STATE_VAR REAL(KIND=JPRB), ALLOCATABLE :: REF_R2(:,:), REF_R3(:,:,:), REF_R4(:,:,:,:) - REAL(C_DOUBLE), POINTER :: FIELD_R1(:,:), FIELD_R2(:,:,:), FIELD_R3(:,:,:,:) + REAL(KIND=JPRB), POINTER :: FIELD_R1(:,:), FIELD_R2(:,:,:), FIELD_R3(:,:,:,:) TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(ATLAS_FIELD) :: FIELD INTEGER :: B, BSIZE, JL, JK, JM From 4f56faaebcb494b8b30c0d8ce0ba67abb4a0688e Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 15:19:46 +0000 Subject: [PATCH 098/174] fixed for single precision in Atlas structures --- .../cloudsc_global_atlas_state_mod.F90 | 99 +++++++++---------- .../validate_atlas_mod.F90 | 3 +- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index ff81743c..198ea5c4 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -29,10 +29,10 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD IMPLICIT NONE TYPE VAR3D_PTR - REAL(C_DOUBLE), POINTER :: PTR(:,:,:) + REAL(KIND=JPRB), POINTER :: PTR(:,:,:) END TYPE TYPE VAR2D_PTR - REAL(C_DOUBLE), POINTER :: PTR(:,:) + REAL(KIND=JPRB), POINTER :: PTR(:,:) END TYPE CHARACTER(LEN=10), PARAMETER, DIMENSION(30) :: IN_VAR_NAMES = (/ & @@ -52,59 +52,58 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD TYPE(VAR2D_PTR), DIMENSION(15) :: OUT_VARS_2D_REAL64 ! Input field variables and tendencies - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLCRIT_AER(:,:) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PICRIT_AER(:,:) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PRE_ICE(:,:) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCCN(:,:) ! liquid cloud condensation nuclei - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PNICE(:,:) ! ice number concentration (cf. CCN) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PT(:,:) ! T at start of callpar - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PQ(:,:) ! Q at start of callpar - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFA(:,:) ! CC from VDF scheme - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFL(:,:) ! Liq from VDF scheme - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVFI(:,:) ! Ice from VDF scheme - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNA(:,:) ! CC from Dynamics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNL(:,:) ! Liq from Dynamics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PDYNI(:,:) ! Liq from Dynamics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PHRSW(:,:) ! Short-wave heating rate - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PHRLW(:,:) ! Long-wave heating rate - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PVERVEL(:,:) ! Vertical velocity - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PAP(:,:) ! Pressure on full levels - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLU(:,:) ! Conv. condensate - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLUDE(:,:) ! Conv. detrained water - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PSNDE(:,:) ! Conv. detrained snow - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PMFU(:,:) ! Conv. mass flux up - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PMFD(:,:) ! Conv. mass flux down - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PA(:,:) ! Original Cloud fraction (t) - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PSUPSAT(:,:) - - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PLSM(:) ! Land fraction (0-1) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLCRIT_AER(:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PICRIT_AER(:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PRE_ICE(:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PCCN(:,:) ! liquid cloud condensation nuclei + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PNICE(:,:) ! ice number concentration (cf. CCN) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PT(:,:) ! T at start of callpar + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PQ(:,:) ! Q at start of callpar + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVFA(:,:) ! CC from VDF scheme + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVFL(:,:) ! Liq from VDF scheme + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVFI(:,:) ! Ice from VDF scheme + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PDYNA(:,:) ! CC from Dynamics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PDYNL(:,:) ! Liq from Dynamics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PDYNI(:,:) ! Liq from Dynamics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PHRSW(:,:) ! Short-wave heating rate + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PHRLW(:,:) ! Long-wave heating rate + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PVERVEL(:,:) ! Vertical velocity + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PAP(:,:) ! Pressure on full levels + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLU(:,:) ! Conv. condensate + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLUDE(:,:) ! Conv. detrained water + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PSNDE(:,:) ! Conv. detrained snow + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PMFU(:,:) ! Conv. mass flux up + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PMFD(:,:) ! Conv. mass flux down + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PA(:,:) ! Original Cloud fraction (t) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PSUPSAT(:,:) + + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLSM(:) ! Land fraction (0-1) LOGICAL, POINTER, CONTIGUOUS :: LDCUM(:) ! Convection active INTEGER, POINTER, CONTIGUOUS :: KTYPE(:) ! Convection type 0,1,2 - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PAPH(:,:) ! Pressure on half levels - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PEXTRA(:,:,:) ! extra fields - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCLV(:,:,:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PAPH(:,:) ! Pressure on half levels + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PCLV(:,:,:) TYPE(STATE_TYPE) :: TENDENCY_CML ! cumulative tendency used for final output TYPE(STATE_TYPE) :: TENDENCY_TMP ! cumulative tendency used as input TYPE(STATE_TYPE) :: TENDENCY_LOC ! local tendency from cloud scheme ! Output fields used for validation - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQLF(:,:) ! Flux of liquid - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQIF(:,:) ! Flux of ice - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQLNG(:,:) ! -ve corr for liq - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQNNG(:,:) ! -ve corr for ice - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQRF(:,:) ! Flux diagnostics - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQSF(:,:) ! for DDH, generic - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQRNG(:,:) ! rain - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFCQSNG(:,:) ! snow - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQLTUR(:,:) ! liquid flux due to VDF - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFSQITUR(:,:) ! ice flux due to VDF - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFPLSL(:,:) ! liq+rain sedim flux - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFPLSN(:,:) ! ice+snow sedim flux - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFHPSL(:,:) ! Enthalpy flux for liq - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PFHPSN(:,:) ! Enthalpy flux for ice - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PCOVPTOT(:,:) ! Precip fraction - REAL(C_DOUBLE), POINTER, CONTIGUOUS :: PRAINFRAC_TOPRFZ(:) + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQLF(:,:) ! Flux of liquid + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQIF(:,:) ! Flux of ice + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQLNG(:,:) ! -ve corr for liq + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQNNG(:,:) ! -ve corr for ice + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQRF(:,:) ! Flux diagnostics + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQSF(:,:) ! for DDH, generic + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQRNG(:,:) ! rain + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFCQSNG(:,:) ! snow + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQLTUR(:,:) ! liquid flux due to VDF + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFSQITUR(:,:) ! ice flux due to VDF + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFPLSL(:,:) ! liq+rain sedim flux + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFPLSN(:,:) ! ice+snow sedim flux + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFHPSL(:,:) ! Enthalpy flux for liq + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PFHPSN(:,:) ! Enthalpy flux for ice + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PCOVPTOT(:,:) ! Precip fraction + REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PRAINFRAC_TOPRFZ(:) CONTAINS PROCEDURE :: GET_BLOCK => CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK @@ -134,7 +133,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) TYPE(ATLAS_FIELDSET), INTENT(INOUT) :: FSET INTEGER, INTENT(IN) :: IBLK - REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) + REAL(KIND=JPRB), POINTER :: TMP3D(:,:,:) ! CALL FSET%UPDATE_DEVICE(1,IBLK) ! field = fset%field(1) @@ -217,8 +216,8 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) INTEGER(KIND=JPIM) :: KLON, IVAR, B TYPE(VAR3D_PTR), DIMENSION(24) :: IN_VARS_3D_REAL64 TYPE(VAR3D_PTR), DIMENSION(15) :: OUT_VARS_3D_REAL64 - REAL(C_DOUBLE), POINTER :: TMP3D(:,:,:) - REAL(C_DOUBLE), POINTER :: TMP2D(:,:) + REAL(KIND=JPRB), POINTER :: TMP3D(:,:,:) + REAL(KIND=JPRB), POINTER :: TMP2D(:,:) TYPE(ATLAS_FIELD) :: FIELD TYPE(ATLAS_CONFIG) :: CONFIG TYPE(ATLAS_CONFIG), DIMENSION(24) :: IN_MFIELD_CONFIG diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 2308f582..20b7888b 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -46,7 +46,8 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) CHARACTER(*), INTENT(IN), OPTIONAL :: STATE_VAR REAL(KIND=JPRB), ALLOCATABLE :: REF_R2(:,:), REF_R3(:,:,:), REF_R4(:,:,:,:) - REAL(C_DOUBLE), POINTER :: FIELD_R1(:,:), FIELD_R2(:,:,:), FIELD_R3(:,:,:,:) + REAL(KIND=JPRB), POINTER :: FIELD_R1(:,:), FIELD_R2(:,:,:), FIELD_R3(:,:,:,:) + TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(ATLAS_FIELD) :: FIELD INTEGER :: B, BSIZE, JL, JK, JM REAL(KIND=JPRB) :: ZMINVAL(1), ZMAX_VAL_ERR(2), ZDIFF, ZSUM_ERR_ABS(2), ZRELERR, ZAVGPGP From ea6d3747f4acd5bdbb9abd2b9c8dfd2da39a05f5 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 14:54:39 +0000 Subject: [PATCH 099/174] support single precision in expand_atlas_mod --- src/cloudsc_fortran_atlas/expand_atlas_mod.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 index 3253de1e..84e7346f 100644 --- a/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/expand_atlas_mod.F90 @@ -19,7 +19,7 @@ module expand_atlas_mod use file_io_mod, only: input_initialize, load_scalar, load_array use expand_mod, only: get_offsets, expand - use, intrinsic :: iso_c_binding, only : c_int, c_double + use, intrinsic :: iso_c_binding, only : c_int implicit none @@ -38,7 +38,7 @@ subroutine loadvar_atlas(fset, fspace, name, nlon, ngptotg) real(kind=jprb), allocatable :: buffer_r1(:), buffer_r2(:,:), buffer_r3(:,:,:) integer(kind=jpim), allocatable :: buffer_i1(:) logical, allocatable :: buffer_l1(:) - real(c_double), pointer :: field_r1(:,:), field_r2(:,:,:), field_r3(:,:,:,:) + real(kind=jprb), pointer :: field_r1(:,:), field_r2(:,:,:), field_r3(:,:,:,:) integer(c_int), pointer :: field_i1(:,:) logical, pointer :: field_l1(:,:) logical :: lfield, rfield, ifield @@ -114,7 +114,7 @@ subroutine loadstate_atlas(fset, name, nlon, ngptotg) type(atlas_trace) :: trace real(kind=jprb), allocatable :: buffer(:,:,:) - real(c_double), pointer :: field_r3(:,:,:,:) + real(kind=jprb), pointer :: field_r3(:,:,:,:) trace = atlas_trace("expand_atlas_mod.F90", __LINE__, "loadstate_atlas", "IO") From 3199964b72599156676e84023f575a3828eefdd8 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 16:09:36 +0000 Subject: [PATCH 100/174] make atlas batching with MultiField work for SerialBox --- .../cloudsc_driver_mod.F90 | 4 +- .../cloudsc_global_atlas_state_mod.F90 | 64 +++++++++---------- .../validate_atlas_mod.F90 | 1 - 3 files changed, 31 insertions(+), 38 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 index bea46abd..a3ac3e54 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_driver_mod.F90 @@ -56,9 +56,9 @@ SUBROUTINE CLOUDSC_DRIVER(FSET, NUMOMP, NGPTOTG, KFLDX, PTSPHY) POWER_TOTAL = 0_JPIB POWER_COUNT = 0_JPIB - FIELD = FSET%FIELD("PEXTRA") + FIELD = FSET%FIELD("PCLV") FSPACE = FIELD%FUNCTIONSPACE() - NPROMA = FSPACE%NPROMA() + NPROMA = FIELD%SHAPE(1) NLEV = FSPACE%LEVELS() NGPTOT = FSPACE%SIZE() diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 198ea5c4..aa19e0e3 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -35,12 +35,12 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD REAL(KIND=JPRB), POINTER :: PTR(:,:) END TYPE - CHARACTER(LEN=10), PARAMETER, DIMENSION(30) :: IN_VAR_NAMES = (/ & + CHARACTER(LEN=10), PARAMETER, DIMENSION(29) :: IN_VAR_NAMES = (/ & "PLCRIT_AER", "PICRIT_AER", "PRE_ICE ", "PCCN ", "PNICE ", "PT ", "PQ ", & "PVFA ", "PVFL ", "PVFI ", "PDYNA ", "PDYNL ", "PDYNI ", "PHRSW ", & "PHRLW ", "PVERVEL ", "PAP ", "PLU ", "PLUDE ", "PSNDE ", "PMFU ", & "PMFD ", "PA ", "PSUPSAT ", & - "PLSM ", "LDCUM ", "KTYPE ", "PAPH ", "PEXTRA ", "PCLV " /) + "PLSM ", "LDCUM ", "KTYPE ", "PAPH ", "PCLV " /) CHARACTER(LEN=16), PARAMETER, DIMENSION(16) :: OUT_VAR_NAMES = (/ & "PFSQLF ", "PFSQIF ", "PFCQLNG ", "PFCQNNG ", "PFSQRF ", & "PFSQSF ", "PFCQRNG ", "PFCQSNG ", "PFSQLTUR ", "PFSQITUR ", & @@ -48,9 +48,6 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD "PRAINFRAC_TOPRFZ" /) TYPE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK_VIEW - TYPE(VAR2D_PTR), DIMENSION(24) :: IN_VARS_2D_REAL64 - TYPE(VAR2D_PTR), DIMENSION(15) :: OUT_VARS_2D_REAL64 - ! Input field variables and tendencies REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLCRIT_AER(:,:) REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PICRIT_AER(:,:) @@ -78,8 +75,8 @@ MODULE CLOUDSC_GLOBAL_ATLAS_STATE_MOD REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PSUPSAT(:,:) REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PLSM(:) ! Land fraction (0-1) - LOGICAL, POINTER, CONTIGUOUS :: LDCUM(:) ! Convection active - INTEGER, POINTER, CONTIGUOUS :: KTYPE(:) ! Convection type 0,1,2 + LOGICAL, POINTER, CONTIGUOUS :: LDCUM(:) ! Convection active + INTEGER, POINTER, CONTIGUOUS :: KTYPE(:) ! Convection type 0,1,2 REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PAPH(:,:) ! Pressure on half levels REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PCLV(:,:,:) @@ -167,41 +164,40 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) CALL FSET%DATA(26, SELF%LDCUM, IBLK) CALL FSET%DATA(27, SELF%KTYPE, IBLK) CALL FSET%DATA(28, SELF%PAPH, IBLK) - CALL FSET%DATA(29, SELF%PEXTRA, IBLK) - CALL FSET%DATA(30, SELF%PCLV, IBLK) + CALL FSET%DATA(29, SELF%PCLV, IBLK) - CALL FSET%DATA(31, TMP3D, IBLK) + CALL FSET%DATA(30, TMP3D, IBLK) SELF%TENDENCY_CML%T => TMP3D(:,:,1) SELF%TENDENCY_CML%A => TMP3D(:,:,2) SELF%TENDENCY_CML%Q => TMP3D(:,:,3) SELF%TENDENCY_CML%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(32, TMP3D, IBLK) + CALL FSET%DATA(31, TMP3D, IBLK) SELF%TENDENCY_TMP%T => TMP3D(:,:,1) SELF%TENDENCY_TMP%A => TMP3D(:,:,2) SELF%TENDENCY_TMP%Q => TMP3D(:,:,3) SELF%TENDENCY_TMP%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(33, TMP3D, IBLK) + CALL FSET%DATA(32, TMP3D, IBLK) SELF%TENDENCY_LOC%T => TMP3D(:,:,1) SELF%TENDENCY_LOC%A => TMP3D(:,:,2) SELF%TENDENCY_LOC%Q => TMP3D(:,:,3) SELF%TENDENCY_LOC%CLD => TMP3D(:,:,4:) - CALL FSET%DATA(34, SELF%PFSQLF, IBLK) - CALL FSET%DATA(35, SELF%PFSQIF, IBLK) - CALL FSET%DATA(36, SELF%PFCQLNG, IBLK) - CALL FSET%DATA(37, SELF%PFCQNNG, IBLK) - CALL FSET%DATA(38, SELF%PFSQRF, IBLK) - CALL FSET%DATA(39, SELF%PFSQSF, IBLK) - CALL FSET%DATA(40, SELF%PFCQRNG, IBLK) - CALL FSET%DATA(41, SELF%PFCQSNG, IBLK) - CALL FSET%DATA(42, SELF%PFSQLTUR, IBLK) - CALL FSET%DATA(43, SELF%PFSQITUR, IBLK) - CALL FSET%DATA(44, SELF%PFPLSL, IBLK) - CALL FSET%DATA(45, SELF%PFPLSN, IBLK) - CALL FSET%DATA(46, SELF%PFHPSL, IBLK) - CALL FSET%DATA(47, SELF%PFHPSN, IBLK) - CALL FSET%DATA(48, SELF%PCOVPTOT, IBLK) - CALL FSET%DATA(49, SELF%PRAINFRAC_TOPRFZ, IBLK) + CALL FSET%DATA(33, SELF%PFSQLF, IBLK) + CALL FSET%DATA(34, SELF%PFSQIF, IBLK) + CALL FSET%DATA(35, SELF%PFCQLNG, IBLK) + CALL FSET%DATA(36, SELF%PFCQNNG, IBLK) + CALL FSET%DATA(37, SELF%PFSQRF, IBLK) + CALL FSET%DATA(38, SELF%PFSQSF, IBLK) + CALL FSET%DATA(39, SELF%PFCQRNG, IBLK) + CALL FSET%DATA(40, SELF%PFCQSNG, IBLK) + CALL FSET%DATA(41, SELF%PFSQLTUR, IBLK) + CALL FSET%DATA(42, SELF%PFSQITUR, IBLK) + CALL FSET%DATA(43, SELF%PFPLSL, IBLK) + CALL FSET%DATA(44, SELF%PFPLSN, IBLK) + CALL FSET%DATA(45, SELF%PFHPSL, IBLK) + CALL FSET%DATA(46, SELF%PFHPSN, IBLK) + CALL FSET%DATA(47, SELF%PCOVPTOT, IBLK) + CALL FSET%DATA(48, SELF%PRAINFRAC_TOPRFZ, IBLK) END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) @@ -214,14 +210,12 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) TYPE(ATLAS_STRUCTUREDGRID) :: GRID TYPE(ATLAS_MULTIFIELD) :: MULTIFIELD_IN, MULTIFIELD_OUT INTEGER(KIND=JPIM) :: KLON, IVAR, B - TYPE(VAR3D_PTR), DIMENSION(24) :: IN_VARS_3D_REAL64 - TYPE(VAR3D_PTR), DIMENSION(15) :: OUT_VARS_3D_REAL64 REAL(KIND=JPRB), POINTER :: TMP3D(:,:,:) REAL(KIND=JPRB), POINTER :: TMP2D(:,:) TYPE(ATLAS_FIELD) :: FIELD TYPE(ATLAS_CONFIG) :: CONFIG - TYPE(ATLAS_CONFIG), DIMENSION(24) :: IN_MFIELD_CONFIG - TYPE(ATLAS_CONFIG), DIMENSION(14) :: OUT_MFIELD_CONFIG + TYPE(ATLAS_CONFIG), DIMENSION(24) :: IN_MFIELD_CONFIG ! the last five variables are special and added through FieldSet + TYPE(ATLAS_CONFIG), DIMENSION(14) :: OUT_MFIELD_CONFIG ! the last two variables are special and added through FieldSet INTEGER :: INVAR_SIZE, OUTVAR_SIZE LOGICAL :: LMULTIFIELD CHARACTER(len=8) :: CENV @@ -260,7 +254,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) CALL CONFIG%SET("ngptot", FSPACE%SIZE()) CALL CONFIG%SET("datatype", "real64") - INVAR_SIZE = SIZE(IN_VAR_NAMES) - 6 ! the last six variables are special and added through FieldSet + INVAR_SIZE = SIZE(IN_VAR_NAMES) - 5 IF (LMULTIFIELD) THEN ! input multifield on model levels, i.e. LEVELS = FSPACE%LEVELS() = SELF%KLEV DO IVAR = 1, INVAR_SIZE @@ -286,14 +280,14 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="LDCUM", KIND=ATLAS_LOGICAL(), LEVELS=0)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="KTYPE", KIND=ATLAS_INTEGER(JPIM), LEVELS=0)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PAPH", KIND=ATLAS_REAL(JPRB), LEVELS=SELF%KLEV+1)) - CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PEXTRA", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,SELF%KFLDX))) + !CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PEXTRA", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,SELF%KFLDX))) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME="PCLV", KIND=ATLAS_REAL(JPRB), VARIABLES=MAX(1,NCLV))) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_CML', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_TMP', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME='TENDENCY_LOC', KIND=ATLAS_REAL(JPRB), VARIABLES=3+NCLV)) - OUTVAR_SIZE = SIZE(OUT_VAR_NAMES) - 2 ! the last two variables are special and added through FieldSet + OUTVAR_SIZE = SIZE(OUT_VAR_NAMES) - 2 IF (LMULTIFIELD) THEN ! output multifield on model interfaces, i.e. LEVELS = FSPACE%LEVELS() + 1 = SELF%KLEV + 1 DO IVAR = 1, OUTVAR_SIZE diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 20b7888b..eb5bf38f 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -47,7 +47,6 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) REAL(KIND=JPRB), ALLOCATABLE :: REF_R2(:,:), REF_R3(:,:,:), REF_R4(:,:,:,:) REAL(KIND=JPRB), POINTER :: FIELD_R1(:,:), FIELD_R2(:,:,:), FIELD_R3(:,:,:,:) - TYPE(ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS) :: FSPACE TYPE(ATLAS_FIELD) :: FIELD INTEGER :: B, BSIZE, JL, JK, JM REAL(KIND=JPRB) :: ZMINVAL(1), ZMAX_VAL_ERR(2), ZDIFF, ZSUM_ERR_ABS(2), ZRELERR, ZAVGPGP From 69501b774dbd1c441b355cee074a2be6874d859a Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 26 Sep 2023 16:16:59 +0000 Subject: [PATCH 101/174] ensure correct datatype in MultiField for the single precision --- src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index aa19e0e3..2955c4e5 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -218,7 +218,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) TYPE(ATLAS_CONFIG), DIMENSION(14) :: OUT_MFIELD_CONFIG ! the last two variables are special and added through FieldSet INTEGER :: INVAR_SIZE, OUTVAR_SIZE LOGICAL :: LMULTIFIELD - CHARACTER(len=8) :: CENV + CHARACTER(len=8) :: CENV, FPREC INTEGER :: CENV_LEN TYPE(ATLAS_TRACE) :: TRACE, TRACE_IO @@ -252,7 +252,8 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) CALL CONFIG%SET("type", "MultiFieldCreatorIFS") CALL CONFIG%SET("nproma", NPROMA) CALL CONFIG%SET("ngptot", FSPACE%SIZE()) - CALL CONFIG%SET("datatype", "real64") + WRITE (FPREC,"(A4,I2)") "real", 8*jprb + CALL CONFIG%SET("datatype", TRIM(FPREC)) INVAR_SIZE = SIZE(IN_VAR_NAMES) - 5 IF (LMULTIFIELD) THEN From 71f6b327b3549cf38d5834c84a01cf51ff0ce669 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Wed, 27 Sep 2023 13:13:22 +0100 Subject: [PATCH 102/174] Disable MPI autodiscovery for NVHPC@Github runners --- arch/toolchains/github-ubuntu-nvhpc.cmake | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/toolchains/github-ubuntu-nvhpc.cmake b/arch/toolchains/github-ubuntu-nvhpc.cmake index deda8b6f..778f6f35 100644 --- a/arch/toolchains/github-ubuntu-nvhpc.cmake +++ b/arch/toolchains/github-ubuntu-nvhpc.cmake @@ -6,12 +6,6 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) - #################################################################### # OpenMP FLAGS #################################################################### @@ -22,7 +16,7 @@ set( OpenMP_Fortran_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) #################################################################### -# OpenAcc FLAGS +# OpenACC FLAGS #################################################################### # NB: We have to add `-mp` again to avoid undefined symbols during linking From ee502b58381a0242193e59bce11ff7d7eca8063e Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 27 Sep 2023 13:22:28 +0000 Subject: [PATCH 103/174] another attempt at fixing SerialBox for Atlas structure --- src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 | 2 +- src/cloudsc_fortran_atlas/validate_atlas_mod.F90 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 2955c4e5..e6f83ff0 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -371,7 +371,6 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, FSPACE, NGPTOTG) CALL INPUT_INITIALIZE(NAME='reference') CALL LOAD_SCALAR('KLON', KLON) print *, "KLON = ", KLON - CALL INPUT_FINALIZE() ! Write variable validation header IF (IRANK == 0) THEN @@ -386,6 +385,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, FSPACE, NGPTOTG) CALL VALIDATEVAR_ATLAS(FSET, FSPACE, TRIM(OUT_VAR_NAMES(IVAR)), KLON, NGPTOTG) ENDDO CALL VALIDATESTATE_ATLAS(FSET, FSPACE, 'TENDENCY_LOC', KLON, NGPTOTG) + CALL INPUT_FINALIZE() CALL TRACE%FINAL() END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index eb5bf38f..4a28c135 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -71,7 +71,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) ZMAX_VAL_ERR(2) = 0.0_JPRB ZSUM_ERR_ABS(:) = 0.0_JPRB - CALL INPUT_INITIALIZE(NAME='reference') + !CALL INPUT_INITIALIZE(NAME='reference') IF (FRANK == 2) THEN CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) CALL FIELD%DATA(FIELD_R1) @@ -161,7 +161,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) PRINT *, "FIELD RANK NOT SUPPORTED" CALL EXIT(1) ENDIF - CALL INPUT_FINALIZE() + !CALL INPUT_FINALIZE() CALL CLOUDSC_MPI_REDUCE_MIN(ZMINVAL, 1, 0) CALL CLOUDSC_MPI_REDUCE_MAX(ZMAX_VAL_ERR, 2, 0) From 0051b48dad800df987e24ece5608ed7d479e2c0b Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 27 Sep 2023 14:10:24 +0000 Subject: [PATCH 104/174] fix SerialBox with Atlas implementation - SBox does not like too many opening and closing of reference input --- src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 | 2 +- src/cloudsc_fortran_atlas/validate_atlas_mod.F90 | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index 527a57a1..baa3a9ed 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -298,7 +298,6 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, NGPTOTG) CALL INPUT_INITIALIZE(NAME='reference') CALL LOAD_SCALAR('KLON', KLON) print *, "KLON = ", KLON - CALL INPUT_FINALIZE() ! Write variable validation header IF (IRANK == 0) THEN @@ -313,6 +312,7 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE(SELF, FSET, NGPTOTG) CALL VALIDATEVAR_ATLAS(FSET, OUT_VAR_NAMES(IVAR), KLON, NGPTOTG) ENDDO CALL VALIDATESTATE_ATLAS(FSET, 'TENDENCY_LOC', KLON, NGPTOTG) + CALL INPUT_FINALIZE() END SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_VALIDATE diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 34f5b1b9..0a821c32 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -17,7 +17,6 @@ MODULE VALIDATE_ATLAS_MOD USE ATLAS_FUNCTIONSPACE_BLOCKSTRUCTUREDCOLUMNS_MODULE USE, INTRINSIC :: ISO_C_BINDING USE EXPAND_MOD, ONLY: LOAD_AND_EXPAND - USE FILE_IO_MOD, ONLY: INPUT_INITIALIZE, INPUT_FINALIZE IMPLICIT NONE @@ -72,7 +71,6 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) ZMAX_VAL_ERR(2) = 0.0_JPRB ZSUM_ERR_ABS(:) = 0.0_JPRB - CALL INPUT_INITIALIZE(NAME='reference') IF (FRANK == 2) THEN CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CALL FIELD%DATA(FIELD_R1) @@ -162,7 +160,6 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, NAME, NLON, NGPTOTG, STATE_VAR) PRINT *, "FIELD RANK NOT SUPPORTED" CALL EXIT(1) ENDIF - CALL INPUT_FINALIZE() CALL CLOUDSC_MPI_REDUCE_MIN(ZMINVAL, 1, 0) CALL CLOUDSC_MPI_REDUCE_MAX(ZMAX_VAL_ERR, 2, 0) From d61e2524b88c1db75d8caa9d3d2ccc8bd8fc4413 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Wed, 27 Sep 2023 16:00:54 +0100 Subject: [PATCH 105/174] Disable MPI with NVHPC on Github (became default ON due to Atlas default) --- arch/toolchains/github-ubuntu-nvhpc.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/toolchains/github-ubuntu-nvhpc.cmake b/arch/toolchains/github-ubuntu-nvhpc.cmake index 778f6f35..f0b88671 100644 --- a/arch/toolchains/github-ubuntu-nvhpc.cmake +++ b/arch/toolchains/github-ubuntu-nvhpc.cmake @@ -6,6 +6,9 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +# Disable MPI in Github runner with NVHPC +set( ENABLE_MPI OFF CACHE STRING "" ) + #################################################################### # OpenMP FLAGS #################################################################### From f453604963b88ab2327b693ba7e74e68954fb37c Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Thu, 28 Sep 2023 10:07:32 +0100 Subject: [PATCH 106/174] Test different CUDA runtime export --- arch/github/ubuntu/nvhpc/21.9/env.sh | 5 +++-- arch/github/ubuntu/nvhpc/23.5/env.sh | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/github/ubuntu/nvhpc/21.9/env.sh b/arch/github/ubuntu/nvhpc/21.9/env.sh index 6e05f756..44442acd 100644 --- a/arch/github/ubuntu/nvhpc/21.9/env.sh +++ b/arch/github/ubuntu/nvhpc/21.9/env.sh @@ -11,7 +11,6 @@ ### Variables export NVHPC_INSTALL_DIR=${GITHUB_WORKSPACE}/nvhpc-install export NVHPC_VERSION=21.9 -export CUDA_VERSION=11.4 export NVHPC_DIR=${NVHPC_INSTALL_DIR}/Linux_x86_64/${NVHPC_VERSION} ### Compilers @@ -19,10 +18,12 @@ export PATH=${NVHPC_DIR}/compilers/bin:${PATH} export NVHPC_LIBRARY_PATH=${NVHPC_DIR}/compilers/lib export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH}:${LD_LIBRARY_PATH} +### CUDA runtime +export LD_LIBRARY_PATH=${NVHPC_DIR}/cuda/lib64/stubs:${NVHPC_DIR}/cuda/lib64:${LD_LIBRARY_PATH} + ### MPI export MPI_HOME=${NVHPC_DIR}/comm_libs/mpi export PATH=${MPI_HOME}/bin:${PATH} -export LD_LIBRARY_PATH=${NVHPC_DIR}/cuda/${CUDA_VERSION}/targets/x86_64-linux/lib/stubs:${LD_LIBRARY_PATH} ### HDF5 export HDF5_DIR=${GITHUB_WORKSPACE}/hdf5-install diff --git a/arch/github/ubuntu/nvhpc/23.5/env.sh b/arch/github/ubuntu/nvhpc/23.5/env.sh index a2261c25..1ef451c7 100644 --- a/arch/github/ubuntu/nvhpc/23.5/env.sh +++ b/arch/github/ubuntu/nvhpc/23.5/env.sh @@ -11,7 +11,6 @@ ### Variables export NVHPC_INSTALL_DIR=${GITHUB_WORKSPACE}/nvhpc-install export NVHPC_VERSION=23.5 -export CUDA_VERSION=12.1 export NVHPC_DIR=${NVHPC_INSTALL_DIR}/Linux_x86_64/${NVHPC_VERSION} ### Compilers @@ -19,10 +18,12 @@ export PATH=${NVHPC_DIR}/compilers/bin:${PATH} export NVHPC_LIBRARY_PATH=${NVHPC_DIR}/compilers/lib export LD_LIBRARY_PATH=${NVHPC_LIBRARY_PATH}:${LD_LIBRARY_PATH} +### CUDA runtime +export LD_LIBRARY_PATH=${NVHPC_DIR}/cuda/lib64/stubs:${NVHPC_DIR}/cuda/lib64:${LD_LIBRARY_PATH} + ### MPI export MPI_HOME=${NVHPC_DIR}/comm_libs/mpi export PATH=${MPI_HOME}/bin:${PATH} -export LD_LIBRARY_PATH=${NVHPC_DIR}/cuda/${CUDA_VERSION}/targets/x86_64-linux/lib/stubs:${LD_LIBRARY_PATH} ### HDF5 export HDF5_DIR=${GITHUB_WORKSPACE}/hdf5-install From dab193c935300f40e924fdf75e6d197de7d79447 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Thu, 28 Sep 2023 14:18:36 +0100 Subject: [PATCH 107/174] Do not fail on free-disk-space --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31f74c08..4df1df3e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -101,6 +101,7 @@ jobs: - name: Free Disk Space (Ubuntu) uses: jlumbroso/free-disk-space@main if: contains( matrix.arch, 'nvhpc' ) + continue-on-error: true with: # this might remove tools that are actually needed, # if set to "true" but frees about 6 GB From d6a8d5e8b79c3671971bc9291be51539724eb833 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 29 Sep 2023 16:12:55 +0100 Subject: [PATCH 108/174] Do not run GPU/CUDA variants in CI --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4df1df3e..998fea59 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,8 +53,8 @@ jobs: - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' - build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + build_flags: '--with-loki --with-atlas' + ctest_exclude_pattern: '-loki-c' # loki-c variant causes SIGFPE - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' @@ -69,8 +69,8 @@ jobs: - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' - build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + build_flags: '--with-loki --with-atlas' + ctest_exclude_pattern: '-loki-c' # loki-c variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' @@ -159,7 +159,7 @@ jobs: # Run ctest - name: Run CTest - if: ${{ !contains(matrix.build_flags, '--single-precision') }} + if: ${{ !( contains(matrix.build_flags, '--single-precision') || contains(matrix.build_flags, '--with-cuda') || contains(matrix.build_flags, '--with-gpu') ) }} working-directory: ./build run: | source env.sh From 3eeb926c6d4358df334776df82c9891a7710baf4 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Mon, 20 Nov 2023 11:54:07 +0000 Subject: [PATCH 109/174] Loki: Update to pure list-based routine/dimension config entries --- src/cloudsc_loki/cloudsc_cuf_loki.config | 40 +++++++++++---------- src/cloudsc_loki/cloudsc_loki.config | 44 +++++++++++++----------- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/src/cloudsc_loki/cloudsc_cuf_loki.config b/src/cloudsc_loki/cloudsc_cuf_loki.config index e3addcad..5a37f42e 100644 --- a/src/cloudsc_loki/cloudsc_cuf_loki.config +++ b/src/cloudsc_loki/cloudsc_cuf_loki.config @@ -18,25 +18,27 @@ disable = ['timer%start', 'timer%end', 'timer%thread_start', 'timer%thread_end', 'performance_timer%thread_log', 'performance_timer%print_performance'] # Define entry point for call-tree transformation -[[routine]] -name = 'cuf_cloudsc_driver' +[routines] + +[routines.cuf_cloudsc_driver] role = 'driver' expand = true -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' -aliases = ['JKGLO'] + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' + aliases = ['JKGLO'] diff --git a/src/cloudsc_loki/cloudsc_loki.config b/src/cloudsc_loki/cloudsc_loki.config index d6fda309..6eab8577 100644 --- a/src/cloudsc_loki/cloudsc_loki.config +++ b/src/cloudsc_loki/cloudsc_loki.config @@ -12,24 +12,26 @@ disable = ['performance_timer%start', 'performance_timer%end', 'performance_time 'performance_timer%thread_log', 'performance_timer%print_performance'] # Define entry point for call-tree transformation -[[routine]] -name = 'cloudsc_driver' -role = 'driver' -expand = true - -[[dimension]] -name = 'horizontal' -size = 'KLON' -index = 'JL' -bounds = ['KIDIA', 'KFDIA'] -aliases = ['NPROMA', 'KDIM%KLON'] - -[[dimension]] -name = 'vertical' -size = 'KLEV' -index = 'JK' - -[[dimension]] -name = 'block_dim' -size = 'NGPBLKS' -index = 'IBL' +[routines] + +[routines.cloudsc_driver] + role = 'driver' + expand = true + + +# Define indices and bounds for array dimensions +[dimensions] + +[dimensions.horizontal] + size = 'KLON' + index = 'JL' + bounds = ['KIDIA', 'KFDIA'] + aliases = ['NPROMA', 'KDIM%KLON'] + +[dimensions.vertical] + size = 'KLEV' + index = 'JK' + +[dimensions.block_dim] + size = 'NGPBLKS' + index = 'IBL' From cbef64e0840142684246ae1b801f4234f6db9f6c Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Mon, 20 Nov 2023 13:50:31 +0000 Subject: [PATCH 110/174] Loki: Add custom transformation configs for SCC-CUF components As we can now directly configure the setup of individual transformation objest, we no longer need to back-door "dic2p" and "derived_types". This requires some more refactoring once we have better pipelining, but it allows early testing. --- src/cloudsc_loki/cloudsc_cuf_loki.config | 51 +++++++++++++++++++++--- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/src/cloudsc_loki/cloudsc_cuf_loki.config b/src/cloudsc_loki/cloudsc_cuf_loki.config index 5a37f42e..8150ed9e 100644 --- a/src/cloudsc_loki/cloudsc_cuf_loki.config +++ b/src/cloudsc_loki/cloudsc_cuf_loki.config @@ -1,8 +1,3 @@ -derived_types = ['TECLDP'] - -[dic2p] -NLEV = 137 - [default] # Specifies the behaviour of auto-expanded routines role = 'kernel' @@ -42,3 +37,49 @@ expand = true size = 'NGPBLKS' index = 'IBL' aliases = ['JKGLO'] + + +# Define specific transformation settings +[transformations] + +# Loki-SCC-CUF family +# ----------------------------------------- +# For these, we need to explicitly define the "transformation_type" +# and provide the names of derived types for extracting device code. +# +# Please note that these are intended for eventual refactoring! +[transformations.cuf-hoist] + classname = 'SccCufTransformation' + module = 'transformations.scc_cuf' +[transformations.cuf-hoist.options] + transformation_type = 'hoist' + horizontal = '%dimensions.horizontal%' + vertical = '%dimensions.vertical%' + block_dim = '%dimensions.block_dim%' + derived_types = ['TECLDP'] + +[transformations.cuf-dynamic] + classname = 'SccCufTransformation' + module = 'transformations.scc_cuf' +[transformations.cuf-dynamic.options] + transformation_type = 'dynamic' + horizontal = '%dimensions.horizontal%' + vertical = '%dimensions.vertical%' + block_dim = '%dimensions.block_dim%' + derived_types = ['TECLDP'] + +[transformations.cuf-parametrise] + classname = 'SccCufTransformation' + module = 'transformations.scc_cuf' +[transformations.cuf-parametrise.options] + transformation_type = 'parametrise' + horizontal = '%dimensions.horizontal%' + vertical = '%dimensions.vertical%' + block_dim = '%dimensions.block_dim%' + derived_types = ['TECLDP'] + +# For SCC-CUF-parametrise we need to define the +# in-source replacement via "dic2p". +[transformations.ParametriseTransformation] + module = 'loki.transform' + options = { dic2p = {NLEV = 137} } From fe77fb6289b966e6cea72cec89b6ac39c4036591 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Wed, 4 Oct 2023 12:01:41 +0100 Subject: [PATCH 111/174] Experimenting with NVHPC build configuration --- .github/workflows/build.yml | 14 +++++++------- arch/toolchains/github-ubuntu-nvhpc.cmake | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 998fea59..e9ad5729 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,8 +53,8 @@ jobs: - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' - build_flags: '--with-loki --with-atlas' - ctest_exclude_pattern: '-loki-c' # loki-c variant causes SIGFPE + build_flags: '--with-gpu --with-loki --with-cuda' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' @@ -69,18 +69,18 @@ jobs: - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' - build_flags: '--with-loki --with-atlas' - ctest_exclude_pattern: '-loki-c' # loki-c variant causes SIGFPE + build_flags: '--with-gpu --with-loki --with-cuda' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' build_flags: '--single-precision --with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '--with-serialbox' build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' - ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE # Steps represent a sequence of tasks that will be executed as part of the job steps: @@ -159,7 +159,7 @@ jobs: # Run ctest - name: Run CTest - if: ${{ !( contains(matrix.build_flags, '--single-precision') || contains(matrix.build_flags, '--with-cuda') || contains(matrix.build_flags, '--with-gpu') ) }} + if: ${{ !( contains(matrix.build_flags, '--single-precision') || (contains(matrix.build_flags, '--with-cuda') && contains(matrix.build_flags, '--with-atlas')) ) }} working-directory: ./build run: | source env.sh diff --git a/arch/toolchains/github-ubuntu-nvhpc.cmake b/arch/toolchains/github-ubuntu-nvhpc.cmake index f0b88671..8e5b0d9f 100644 --- a/arch/toolchains/github-ubuntu-nvhpc.cmake +++ b/arch/toolchains/github-ubuntu-nvhpc.cmake @@ -24,7 +24,7 @@ set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) # NB: We have to add `-mp` again to avoid undefined symbols during linking # (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc -mp" CACHE STRING "" ) +set( OpenACC_Fortran_FLAGS "-acc=host -mp" CACHE STRING "" ) # Enable this to get more detailed compiler output # set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) From e3b8876c63f175fdddbe105a69def8d64ffd128f Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Mon, 4 Dec 2023 17:39:12 +0200 Subject: [PATCH 112/174] improving cmake usage for HIP version, minor changes and refactoring --- CMakeLists.txt | 16 +++++++- .../lumi/cray-gpu/15.0.1/toolchain.cmake | 19 +++++++++- bundle.yml | 3 +- src/cloudsc_hip/CMakeLists.txt | 38 +++++++++---------- src/cloudsc_hip/cloudsc/load_state.cpp | 32 +++++++--------- 5 files changed, 67 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9a454395..2e5726ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,20 @@ if( HAVE_CUDA ) enable_language( CUDA ) endif() +ecbuild_add_option( FEATURE HIP + DESCRIPTION "HIP" DEFAULT OFF + REQUIRED_PACKAGES "hip" ) +if ( HAVE_HIP ) + if(NOT DEFINED ROCM_PATH) + if(DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed") + else() + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed") + endif() + endif() + find_package(hip REQUIRED) +endif() + ### OpenMP ecbuild_add_option( FEATURE OMP DESCRIPTION "OpenMP" DEFAULT ON @@ -83,7 +97,7 @@ endif() ecbuild_add_option( FEATURE SERIALBOX DESCRIPTION "Use Serialbox to read input and reference data" REQUIRED_PACKAGES "Serialbox" - CONDITION NOT HAVE_HDF5 OR HAVE_SERIALBOX + CONDITION NOT HAVE_HDF5 DEFAULT OFF ) if( HAVE_SERIALBOX ) list(APPEND CLOUDSC_DEFINITIONS HAVE_SERIALBOX) diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake index 4aed4cdc..e7e382b2 100644 --- a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake @@ -18,8 +18,14 @@ set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) #################################################################### set( ENABLE_OMP ON CACHE STRING "" ) -set( OpenMP_C_FLAGS "-homp" CACHE STRING "" ) -set( OpenMP_Fortran_FLAGS "-homp -hnoacc -hlist=aimd -maxrregcount 64" CACHE STRING "" ) +set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-fopenmp -hnoacc -hlist=aimd" CACHE STRING "" ) + +set( OpenMP_C_LIB_NAMES "craymp" ) +set( OpenMP_CXX_LIB_NAMES "craymp" ) +set( OpenMP_Fortran_LIB_NAMES "craymp" ) +set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/15.0.1/cce/x86_64/lib/libcraymp.so" ) #################################################################### # OpenACC FLAGS @@ -30,6 +36,15 @@ set( OpenACC_C_FLAGS "-hacc" ) set( OpenACC_CXX_FLAGS "-hacc" ) set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" ) +#################################################################### +# OpenACC FLAGS +#################################################################### + +set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math") +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_HIP_ARCHITECTURES gfx90a) +endif() + #################################################################### # Compiler FLAGS #################################################################### diff --git a/bundle.yml b/bundle.yml index 77c26c63..5663c0db 100644 --- a/bundle.yml +++ b/bundle.yml @@ -78,10 +78,11 @@ options : ENABLE_CLOUDSC_GPU_SCC_CUF=ON ENABLE_CLOUDSC_GPU_SCC_CUF_K_CACHING=ON BUILD_field_api=ON + - with-hip : help: Enable GPU kernel variant based on HIP cmake: > - ENABLE_CLOUDSC_HIP=ON + ENABLE_HIP=ON - with-mpi : help : Enable MPI-parallel kernel diff --git a/src/cloudsc_hip/CMakeLists.txt b/src/cloudsc_hip/CMakeLists.txt index 768bee1a..0a034585 100644 --- a/src/cloudsc_hip/CMakeLists.txt +++ b/src/cloudsc_hip/CMakeLists.txt @@ -8,26 +8,14 @@ # Define this dwarf variant as an ECBuild feature ecbuild_add_option( FEATURE CLOUDSC_HIP - DESCRIPTION "Build the HIP version CLOUDSC using Serialbox" DEFAULT OFF - CONDITION Serialbox_FOUND + DESCRIPTION "Build the HIP version CLOUDSC using Serialbox" DEFAULT ON + CONDITION Serialbox_FOUND AND HAVE_HIP ) if( HAVE_CLOUDSC_HIP ) - - if(NOT DEFINED ROCM_PATH) - if(DEFINED ENV{ROCM_PATH}) - set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCM has been installed") - else() - set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCM has been installed") - endif() - endif() - - find_package(hip REQUIRED) - set(CMAKE_C_COMPILER "${ROCM_PATH}/bin/hipcc") set(CMAKE_CXX_COMPILER "${ROCM_PATH}/bin/hipcc") - # set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math") ###### SCC-HIP #### ecbuild_add_library( @@ -57,7 +45,11 @@ if( HAVE_CLOUDSC_HIP ) target_include_directories(dwarf-cloudsc-hip-lib PUBLIC $ $) target_link_libraries(dwarf-cloudsc-hip-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) - target_compile_options(dwarf-cloudsc-hip-lib PRIVATE --offload-arch=gfx90a) + if (NOT DEFINED CMAKE_HIP_ARCHITECTURES) + message(WARNING "No HIP architecture is set! ('CMAKE_HIP_ARCHITECTURES' is not defined)") + else() + target_compile_options(dwarf-cloudsc-hip-lib PRIVATE --offload-arch=${CMAKE_HIP_ARCHITECTURES}) + endif() ecbuild_add_executable( TARGET dwarf-cloudsc-hip @@ -95,8 +87,12 @@ if( HAVE_CLOUDSC_HIP ) target_include_directories(dwarf-cloudsc-hip-hoist-lib PUBLIC $ $) target_link_libraries(dwarf-cloudsc-hip-hoist-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) - target_compile_options(dwarf-cloudsc-hip-hoist-lib PRIVATE --offload-arch=gfx90a) - + if (NOT DEFINED CMAKE_HIP_ARCHITECTURES) + message(WARNING "No HIP architecture is set! ('CMAKE_HIP_ARCHITECTURES' is not defined)") + else() + target_compile_options(dwarf-cloudsc-hip-hoist-lib PRIVATE --offload-arch=${CMAKE_HIP_ARCHITECTURES}) + endif() + ecbuild_add_executable( TARGET dwarf-cloudsc-hip-hoist SOURCES dwarf_cloudsc.cpp @@ -133,8 +129,12 @@ if( HAVE_CLOUDSC_HIP ) target_include_directories(dwarf-cloudsc-hip-k-caching-lib PUBLIC $ $) target_link_libraries(dwarf-cloudsc-hip-k-caching-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) - target_compile_options(dwarf-cloudsc-hip-k-caching-lib PRIVATE --offload-arch=gfx90a) - + if (NOT DEFINED CMAKE_HIP_ARCHITECTURES) + message(WARNING "No HIP architecture is set! ('CMAKE_HIP_ARCHITECTURES' is not defined)") + else() + target_compile_options(dwarf-cloudsc-hip-k-caching-lib PRIVATE --offload-arch=${CMAKE_HIP_ARCHITECTURES}) + endif() + ecbuild_add_executable( TARGET dwarf-cloudsc-hip-k-caching SOURCES dwarf_cloudsc.cpp diff --git a/src/cloudsc_hip/cloudsc/load_state.cpp b/src/cloudsc_hip/cloudsc/load_state.cpp index d8e7e03d..5ba60abf 100644 --- a/src/cloudsc_hip/cloudsc/load_state.cpp +++ b/src/cloudsc_hip/cloudsc/load_state.cpp @@ -23,14 +23,13 @@ void query_state(int *klon, int *klev) void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) { - int b, l, i, buf_start_idx, buf_idx; + int b, i, buf_start_idx, buf_idx; -#pragma omp parallel for default(shared) private(b, l, i, buf_start_idx, buf_idx) +#pragma omp parallel for default(shared) private(b, i, buf_start_idx, buf_idx) for (b = 0; b < nblocks; b++) { buf_start_idx = ((b)*nproma) % nlon; for (i = 0; i < nproma; i++) { buf_idx = (buf_start_idx + i) % nlon; - // field[b][i] = buffer[buf_idx]; field_in[b*nproma+i] = buffer[buf_idx]; } } @@ -39,9 +38,9 @@ void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngpto void expand_1d_int(int *buffer, int *field_in, int nlon, int nproma, int ngptot, int nblocks) { - int b, l, i, buf_start_idx, buf_idx; + int b, i, buf_start_idx, buf_idx; - #pragma omp parallel for default(shared) private(b, l, i, buf_start_idx, buf_idx) + #pragma omp parallel for default(shared) private(b, i, buf_start_idx, buf_idx) for (b = 0; b < nblocks; b++) { buf_start_idx = ((b)*nproma) % nlon; for (i = 0; i < nproma; i++) { @@ -49,7 +48,6 @@ void expand_1d_int(int *buffer, int *field_in, int nlon, int nproma, int ngptot, field_in[b*nproma+i] = buffer[buf_idx]; } } - } @@ -61,13 +59,12 @@ void expand_2d(double *buffer_in, double *field_in, int nlon, int nlev, int npro for (b = 0; b < nblocks; b++) { buf_start_idx = ((b)*nproma) % nlon; for (i = 0; i < nproma; i++) { - for (l = 0; l < nlev; l++) { - buf_idx = (buf_start_idx + i) % nlon; - field_in[b*nlev*nproma+l*nproma+i] = buffer_in[l*nlon+buf_idx]; - } + for (l = 0; l < nlev; l++) { + buf_idx = (buf_start_idx + i) % nlon; + field_in[b*nlev*nproma+l*nproma+i] = buffer_in[l*nlon+buf_idx]; + } } } - } void expand_3d(double *buffer_in, double *field_in, int nlon, int nlev, int nclv, int nproma, int ngptot, int nblocks) @@ -78,15 +75,14 @@ void expand_3d(double *buffer_in, double *field_in, int nlon, int nlev, int nclv for (b = 0; b < nblocks; b++) { buf_start_idx = ((b)*nproma) % nlon; for (i = 0; i < nproma; i++) { - for (c = 0; c < nclv; c++) { - for (l = 0; l < nlev; l++) { - buf_idx = (buf_start_idx + i) % nlon; - field_in[b*nclv*nlev*nproma+c*nlev*nproma+l*nproma+i] = buffer_in[c*nlev*nlon+l*nlon+buf_idx]; - } - } + for (c = 0; c < nclv; c++) { + for (l = 0; l < nlev; l++) { + buf_idx = (buf_start_idx + i) % nlon; + field_in[b*nclv*nlev*nproma+c*nlev*nproma+l*nproma+i] = buffer_in[c*nlev*nlon+l*nlon+buf_idx]; + } + } } } - } From 7a617c779f84c9323c8a4ab4737755463e9da5cd Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Tue, 5 Dec 2023 10:54:11 +0200 Subject: [PATCH 113/174] removing multi-threaded HIP test and adding relevant HIP ctests for all variants --- src/cloudsc_hip/CMakeLists.txt | 39 ++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/cloudsc_hip/CMakeLists.txt b/src/cloudsc_hip/CMakeLists.txt index 0a034585..0f04960e 100644 --- a/src/cloudsc_hip/CMakeLists.txt +++ b/src/cloudsc_hip/CMakeLists.txt @@ -57,6 +57,13 @@ if( HAVE_CLOUDSC_HIP ) LIBS dwarf-cloudsc-hip-lib ) + ecbuild_add_test( + TARGET dwarf-cloudsc-hip-serial + COMMAND bin/dwarf-cloudsc-hip + ARGS 1 1000 64 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) ## ###### SCC-HOIST-HIP #### @@ -99,6 +106,13 @@ if( HAVE_CLOUDSC_HIP ) LIBS dwarf-cloudsc-hip-hoist-lib ) + ecbuild_add_test( + TARGET dwarf-cloudsc-hip-hoist-serial + COMMAND bin/dwarf-cloudsc-hip-hoist + ARGS 1 1000 64 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) ## ###### SCC-K-CACHING-HIP #### @@ -141,28 +155,17 @@ if( HAVE_CLOUDSC_HIP ) LIBS dwarf-cloudsc-hip-k-caching-lib ) + ecbuild_add_test( + TARGET dwarf-cloudsc-hip-k-caching-serial + COMMAND bin/dwarf-cloudsc-hip-k-caching + ARGS 1 1000 64 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) ## - - # Create symlink for the input data execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) - ecbuild_add_test( - TARGET dwarf-cloudsc-hip-serial - COMMAND bin/dwarf-cloudsc-hip - ARGS 1 100 16 - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 1 - ) - ecbuild_add_test( - TARGET dwarf-cloudsc-hip-omp - COMMAND bin/dwarf-cloudsc-hip - ARGS 4 100 16 - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. - OMP 4 - CONDITION HAVE_OMP - ) - endif() From cef2017ef28671fd8266143c9e6bebbe4ba26895 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Tue, 5 Dec 2023 12:30:39 +0000 Subject: [PATCH 114/174] Enabling usage of HDF5 for C-style variants (in addition to serialbox), e.g., cloudsc_c --- CMakeLists.txt | 2 +- src/cloudsc_c/CMakeLists.txt | 38 ++- src/cloudsc_c/cloudsc/cloudsc_validate.c | 2 +- src/cloudsc_c/cloudsc/cloudsc_validate.h | 2 +- src/cloudsc_c/cloudsc/load_state.c | 343 ++++++++++++++++++++++- 5 files changed, 373 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 16b2451a..e530bb16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ endif() ### HDF5 ecbuild_add_option( FEATURE HDF5 DESCRIPTION "Use HDF5 to read input and reference data" - REQUIRED_PACKAGES "HDF5 COMPONENTS Fortran" + REQUIRED_PACKAGES "HDF5 COMPONENTS Fortran C" DEFAULT ON ) if( HAVE_HDF5 ) list(APPEND CLOUDSC_DEFINITIONS HAVE_HDF5 ) diff --git a/src/cloudsc_c/CMakeLists.txt b/src/cloudsc_c/CMakeLists.txt index 1a0816a4..45b1e349 100644 --- a/src/cloudsc_c/CMakeLists.txt +++ b/src/cloudsc_c/CMakeLists.txt @@ -9,14 +9,22 @@ # Define this dwarf variant as an ECBuild feature ecbuild_add_option( FEATURE CLOUDSC_C DESCRIPTION "Build the C version CLOUDSC using Serialbox" DEFAULT ON - CONDITION Serialbox_FOUND + CONDITION Serialbox_FOUND OR HDF5_FOUND ) if( HAVE_CLOUDSC_C ) + message("HDF5 include dirs: ${HDF5_C_INCLUDE_DIRS}") + message("HDF5 lib: ${HDF5_LIBRARIES}") + message("HDF5 lib c: ${HDF5_C_LIBRARIES}") + message("HDF5 library dirs: ${HDF5_C_LIBRARY_DIRS}") + set( CMAKE_C_STANDARD 11 ) set( CMAKE_C_STANDARD_REQUIRED ON ) + # necessary for AC + link_directories(string (REPLACE ";" " " DEST "${HDF5_C_LIBRARY_DIRS}")>) + ecbuild_add_library( TARGET dwarf-cloudsc-c-lib INSTALL_HEADERS LISTED @@ -37,12 +45,19 @@ if( HAVE_CLOUDSC_C ) cloudsc/cloudsc_validate.c cloudsc/mycpu.h cloudsc/mycpu.c - PUBLIC_INCLUDES + PRIVATE_INCLUDES + # $<${HAVE_HDF5}:${HDF5_C_INCLUDE_DIRS}> # works on LUMI, doesn't work on AC + $<${HAVE_HDF5}:string (REPLACE ";" " " DEST "${HDF5_C_INCLUDE_DIRS}")> # necessar on AC, not on LUMI + PUBLIC_INCLUDES $ $ PUBLIC_LIBS - Serialbox::Serialbox_C - $<${HAVE_OMP}:OpenMP::OpenMP_C> + $<${HAVE_HDF5}:hdf5> + # $<${HAVE_HDF5}:string (REPLACE ";" " " DEST "${HDF5_C_LIBRARIES}")> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> + $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) ecbuild_add_executable( @@ -74,6 +89,17 @@ if( HAVE_CLOUDSC_C ) CONDITION HAVE_OMP ) -else() - ecbuild_info( "Serialbox not found, disabling C prototype" ) +endif() + +# Create symlink for the input data +if( HAVE_SERIALBOX ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) +endif() + +if( HAVE_HDF5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) endif() diff --git a/src/cloudsc_c/cloudsc/cloudsc_validate.c b/src/cloudsc_c/cloudsc/cloudsc_validate.c index 1a1d5b85..23e786bc 100644 --- a/src/cloudsc_c/cloudsc/cloudsc_validate.c +++ b/src/cloudsc_c/cloudsc/cloudsc_validate.c @@ -154,7 +154,7 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, } -int cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, +void cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, diff --git a/src/cloudsc_c/cloudsc/cloudsc_validate.h b/src/cloudsc_c/cloudsc/cloudsc_validate.h index e27a1390..0ceecd05 100644 --- a/src/cloudsc_c/cloudsc/cloudsc_validate.h +++ b/src/cloudsc_c/cloudsc/cloudsc_validate.h @@ -13,7 +13,7 @@ #include "load_state.h" -int cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, +void cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, diff --git a/src/cloudsc_c/cloudsc/load_state.c b/src/cloudsc_c/cloudsc/load_state.c index e7c1e519..83d108bb 100644 --- a/src/cloudsc_c/cloudsc/load_state.c +++ b/src/cloudsc_c/cloudsc/load_state.c @@ -11,15 +11,40 @@ #include "load_state.h" #include +#ifdef HAVE_SERIALBOX #include "serialbox-c/Serialbox.h" +#endif +#ifdef HAVE_HDF5 +#include "hdf5.h" +#define INPUT_FILE "input.h5" +#define REFERENCE_FILE "reference.h5" +#endif #define min(a, b) (((a) < (b)) ? (a) : (b)) #define max(a, b) (((a) > (b)) ? (a) : (b)) +#ifdef HAVE_HDF5 +void read_hdf5_int(hid_t file_id, const char *name, int *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} + +void read_hdf5(hid_t file_id, const char *name, double *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} +#endif /* Query sizes and dimensions of state arrays */ void query_state(int *klon, int *klev) { +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); serialboxMetainfo_t* globalMetainfo = serialboxSerializerGetGlobalMetainfo(serializer); @@ -28,6 +53,17 @@ void query_state(int *klon, int *klev) serialboxMetainfoDestroy(globalMetainfo); serialboxSerializerDestroy(serializer); +#endif +#ifdef HAVE_HDF5 + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(INPUT_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + read_hdf5_int(file_id, "/KLEV", klev); + read_hdf5_int(file_id, "/KLON", klon); + + status = H5Fclose(file_id); +#endif } void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) @@ -147,7 +183,7 @@ void expand_3d(double *buffer_in, double *field_in, int nlon, int nlev, int nclv } - +#ifdef HAVE_SERIALBOX void load_and_expand_1d(serialboxSerializer_t *serializer, serialboxSavepoint_t* savepoint, const char *name, int nlon, int nproma, int ngptot, int nblocks, double *field) { @@ -187,6 +223,63 @@ void load_and_expand_3d(serialboxSerializer_t *serializer, serialboxSavepoint_t* serialboxSerializerRead(serializer, name, savepoint, buffer, strides, 3); expand_3d((double *)buffer, field, nlon, nlev, nclv, nproma, ngptot, nblocks); } +#endif + +#if HAVE_HDF5 +void load_and_expand_1d(hid_t file_id, const char *name, int nlon, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nlon]; + int strides[1] = {1}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); +} + +void load_and_expand_1d_int(hid_t file_id, const char *name, int nlon, int nproma, int ngptot, int nblocks, int *field) +{ + int buffer[nlon]; + int strides[1] = {1}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + //expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); + //serialboxSerializerRead(serializer, name, savepoint, buffer, strides, 1); + expand_1d_int((int *)buffer, field, nlon, nproma, ngptot, nblocks); +} + +void load_and_expand_2d(hid_t file_id, const char *name, int nlon, int nlev, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nlev][nlon]; + int strides[2] = {1, nlon}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + //expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); + //serialboxSerializerRead(serializer, name, savepoint, buffer, strides, 2); + expand_2d((double *)buffer, field, nlon, nlev, nproma, ngptot, nblocks); +} + +void load_and_expand_3d(hid_t file_id, const char *name, int nlon, int nlev, int nclv, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nclv][nlev][nlon]; + int strides[3] = {1, nlon, nlev*nlon}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + //expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); + //expand_2d((double *)buffer, field, nlon, nlev, nproma, ngptot, nblocks); + expand_3d((double *)buffer, field, nlon, nlev, nclv, nproma, ngptot, nblocks); +} +#endif /* Read input state into memory */ void load_state(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, @@ -199,13 +292,15 @@ void load_state(const int nlon, const int nlev, const int nclv, const int ngptot int* ktype, double* plu, double* plude, double* psnde, double* pmfu, double* pmfd, double* pa, double* pclv, double* psupsat) { + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); serialboxSavepoint_t* savepoint = savepoints[0]; - int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); - load_and_expand_2d(serializer, savepoint, "PLCRIT_AER", nlon, nlev, nproma, ngptot, nblocks, plcrit_aer); load_and_expand_2d(serializer, savepoint, "PICRIT_AER", nlon, nlev, nproma, ngptot, nblocks, picrit_aer); load_and_expand_2d(serializer, savepoint, "PRE_ICE", nlon, nlev, nproma, ngptot, nblocks, pre_ice); @@ -401,6 +496,209 @@ void load_state(const int nlon, const int nlev, const int nclv, const int ngptot serialboxSerializerDestroySavepointVector(savepoints, 1); serialboxSerializerDestroy(serializer); +#endif + +#ifdef HAVE_HDF5 + + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(INPUT_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + load_and_expand_2d(file_id, "PLCRIT_AER", nlon, nlev, nproma, ngptot, nblocks, plcrit_aer); + load_and_expand_2d(file_id, "PICRIT_AER", nlon, nlev, nproma, ngptot, nblocks, picrit_aer); + load_and_expand_2d(file_id, "PRE_ICE", nlon, nlev, nproma, ngptot, nblocks, pre_ice); + load_and_expand_2d(file_id, "PCCN", nlon, nlev, nproma, ngptot, nblocks, pccn); + load_and_expand_2d(file_id, "PNICE", nlon, nlev, nproma, ngptot, nblocks, pnice); + load_and_expand_2d(file_id, "PT", nlon, nlev, nproma, ngptot, nblocks, pt); + load_and_expand_2d(file_id, "PQ", nlon, nlev, nproma, ngptot, nblocks, pq); + load_and_expand_2d(file_id, "TENDENCY_CML_T", nlon, nlev, nproma, ngptot, nblocks, tend_cml_t); + load_and_expand_2d(file_id, "TENDENCY_CML_Q", nlon, nlev, nproma, ngptot, nblocks, tend_cml_q); + load_and_expand_2d(file_id, "TENDENCY_CML_A", nlon, nlev, nproma, ngptot, nblocks, tend_cml_a); + load_and_expand_3d(file_id, "TENDENCY_CML_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_cml_cld); + load_and_expand_2d(file_id, "TENDENCY_TMP_T", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_t); + load_and_expand_2d(file_id, "TENDENCY_TMP_Q", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_q); + load_and_expand_2d(file_id, "TENDENCY_TMP_A", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_a); + load_and_expand_3d(file_id, "TENDENCY_TMP_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_tmp_cld); + load_and_expand_2d(file_id, "PVFA", nlon, nlev, nproma, ngptot, nblocks, pvfa); + load_and_expand_2d(file_id, "PVFL", nlon, nlev, nproma, ngptot, nblocks, pvfl); + load_and_expand_2d(file_id, "PVFI", nlon, nlev, nproma, ngptot, nblocks, pvfi); + load_and_expand_2d(file_id, "PDYNA", nlon, nlev, nproma, ngptot, nblocks, pdyna); + load_and_expand_2d(file_id, "PDYNL", nlon, nlev, nproma, ngptot, nblocks, pdynl); + load_and_expand_2d(file_id, "PDYNI", nlon, nlev, nproma, ngptot, nblocks, pdyni); + load_and_expand_2d(file_id, "PHRSW", nlon, nlev, nproma, ngptot, nblocks, phrsw); + load_and_expand_2d(file_id, "PHRLW", nlon, nlev, nproma, ngptot, nblocks, phrlw); + load_and_expand_2d(file_id, "PVERVEL", nlon, nlev, nproma, ngptot, nblocks, pvervel); + load_and_expand_2d(file_id, "PAP", nlon, nlev, nproma, ngptot, nblocks, pap); + load_and_expand_2d(file_id, "PAPH", nlon, nlev+1, nproma, ngptot, nblocks, paph); + load_and_expand_1d(file_id, "PLSM", nlon, nproma, ngptot, nblocks, plsm); + load_and_expand_1d_int(file_id, "KTYPE", nlon, nproma, ngptot, nblocks, ktype); + load_and_expand_2d(file_id, "PLU", nlon, nlev, nproma, ngptot, nblocks, plu); + load_and_expand_2d(file_id, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(file_id, "PSNDE", nlon, nlev, nproma, ngptot, nblocks, psnde); + load_and_expand_2d(file_id, "PMFU", nlon, nlev, nproma, ngptot, nblocks, pmfu); + load_and_expand_2d(file_id, "PMFD", nlon, nlev, nproma, ngptot, nblocks, pmfd); + load_and_expand_2d(file_id, "PA", nlon, nlev, nproma, ngptot, nblocks, pa); + load_and_expand_3d(file_id, "PCLV", nlon, nlev, nclv, nproma, ngptot, nblocks, pclv); + load_and_expand_2d(file_id, "PSUPSAT", nlon, nlev, nproma, ngptot, nblocks, psupsat); + + read_hdf5(file_id, "/PTSPHY", ptsphy); + + read_hdf5(file_id, "/RG", &rg); + read_hdf5(file_id, "/RD", &rd); + read_hdf5(file_id, "/RCPD", &rcpd); + read_hdf5(file_id, "/RETV", &retv); + read_hdf5(file_id, "/RLVTT", &rlvtt); + read_hdf5(file_id, "/RLSTT", &rlstt); + read_hdf5(file_id, "/RLMLT", &rlmlt); + read_hdf5(file_id, "/RTT", &rtt); + read_hdf5(file_id, "/RV", &rv); + read_hdf5(file_id, "/R2ES", &r2es); + read_hdf5(file_id, "/R3LES", &r3les); + read_hdf5(file_id, "/R3IES", &r3ies); + read_hdf5(file_id, "/R4LES", &r4les); + read_hdf5(file_id, "/R4IES", &r4ies); + read_hdf5(file_id, "/R5LES", &r5les); + read_hdf5(file_id, "/R5IES", &r5ies); + read_hdf5(file_id, "/R5ALVCP", &r5alvcp); + read_hdf5(file_id, "/R5ALSCP", &r5alscp); + read_hdf5(file_id, "/RALVDCP", &ralvdcp); + read_hdf5(file_id, "/RALSDCP", &ralsdcp); + read_hdf5(file_id, "/RALFDCP", &ralfdcp); + read_hdf5(file_id, "/RTWAT", &rtwat); + read_hdf5(file_id, "/RTICE", &rtice); + read_hdf5(file_id, "/RTICECU", &rticecu); + read_hdf5(file_id, "/RTWAT_RTICE_R", &rtwat_rtice_r); + read_hdf5(file_id, "/RTWAT_RTICECU_R", &rtwat_rticecu_r); + read_hdf5(file_id, "/RKOOP1", &rkoop1); + read_hdf5(file_id, "/RKOOP2", &rkoop2); + + read_hdf5(file_id, "/YRECLDP_RAMID", &yrecldp->ramid); + read_hdf5(file_id, "/YRECLDP_RCLDIFF", &yrecldp->rcldiff); + read_hdf5(file_id, "/YRECLDP_RCLDIFF_CONVI", &yrecldp->rcldiff_convi); + read_hdf5(file_id, "/YRECLDP_RCLCRIT", &yrecldp->rclcrit); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_SEA", &yrecldp->rclcrit_sea); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_LAND", &yrecldp->rclcrit_land); + read_hdf5(file_id, "/YRECLDP_RKCONV", &yrecldp->rkconv); + read_hdf5(file_id, "/YRECLDP_RPRC1", &yrecldp->rprc1); + read_hdf5(file_id, "/YRECLDP_RPRC2", &yrecldp->rprc2); + read_hdf5(file_id, "/YRECLDP_RCLDMAX", &yrecldp->rcldmax); + read_hdf5(file_id, "/YRECLDP_RPECONS", &yrecldp->rpecons); + read_hdf5(file_id, "/YRECLDP_RVRFACTOR", &yrecldp->rvrfactor); + read_hdf5(file_id, "/YRECLDP_RPRECRHMAX", &yrecldp->rprecrhmax); + read_hdf5(file_id, "/YRECLDP_RTAUMEL", &yrecldp->rtaumel); + read_hdf5(file_id, "/YRECLDP_RAMIN", &yrecldp->ramin); + read_hdf5(file_id, "/YRECLDP_RLMIN", &yrecldp->rlmin); + read_hdf5(file_id, "/YRECLDP_RKOOPTAU", &yrecldp->rkooptau); + read_hdf5(file_id, "/YRECLDP_RCLDTOPP", &yrecldp->rcldtopp); + read_hdf5(file_id, "/YRECLDP_RLCRITSNOW", &yrecldp->rlcritsnow); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN1", &yrecldp->rsnowlin1); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN2", &yrecldp->rsnowlin2); + read_hdf5(file_id, "/YRECLDP_RICEHI1", &yrecldp->ricehi1); + read_hdf5(file_id, "/YRECLDP_RICEHI2", &yrecldp->ricehi2); + read_hdf5(file_id, "/YRECLDP_RICEINIT", &yrecldp->riceinit); + read_hdf5(file_id, "/YRECLDP_RVICE", &yrecldp->rvice); + read_hdf5(file_id, "/YRECLDP_RVRAIN", &yrecldp->rvrain); + read_hdf5(file_id, "/YRECLDP_RVSNOW", &yrecldp->rvsnow); + read_hdf5(file_id, "/YRECLDP_RTHOMO", &yrecldp->rthomo); + read_hdf5(file_id, "/YRECLDP_RCOVPMIN", &yrecldp->rcovpmin); + read_hdf5(file_id, "/YRECLDP_RCCN", &yrecldp->rccn); + read_hdf5(file_id, "/YRECLDP_RNICE", &yrecldp->rnice); + read_hdf5(file_id, "/YRECLDP_RCCNOM", &yrecldp->rccnom); + read_hdf5(file_id, "/YRECLDP_RCCNSS", &yrecldp->rccnss); + read_hdf5(file_id, "/YRECLDP_RCCNSU", &yrecldp->rccnsu); + read_hdf5(file_id, "/YRECLDP_RCLDTOPCF", &yrecldp->rcldtopcf); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFRATE", &yrecldp->rdepliqrefrate); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFDEPTH", &yrecldp->rdepliqrefdepth); + read_hdf5(file_id, "/YRECLDP_RCL_KKAac", &yrecldp->rcl_kkaac); + read_hdf5(file_id, "/YRECLDP_RCL_KKBac", &yrecldp->rcl_kkbac); + read_hdf5(file_id, "/YRECLDP_RCL_KKAau", &yrecldp->rcl_kkaau); + read_hdf5(file_id, "/YRECLDP_RCL_KKBauq", &yrecldp->rcl_kkbauq); + read_hdf5(file_id, "/YRECLDP_RCL_KKBaun", &yrecldp->rcl_kkbaun); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_sea", &yrecldp->rcl_kk_cloud_num_sea); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_land", &yrecldp->rcl_kk_cloud_num_land); + read_hdf5(file_id, "/YRECLDP_RCL_AI", &yrecldp->rcl_ai); + read_hdf5(file_id, "/YRECLDP_RCL_BI", &yrecldp->rcl_bi); + read_hdf5(file_id, "/YRECLDP_RCL_CI", &yrecldp->rcl_ci); + read_hdf5(file_id, "/YRECLDP_RCL_DI", &yrecldp->rcl_di); + read_hdf5(file_id, "/YRECLDP_RCL_X1I", &yrecldp->rcl_x1i); + read_hdf5(file_id, "/YRECLDP_RCL_X2I", &yrecldp->rcl_x2i); + read_hdf5(file_id, "/YRECLDP_RCL_X3I", &yrecldp->rcl_x3i); + read_hdf5(file_id, "/YRECLDP_RCL_X4I", &yrecldp->rcl_x4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1I", &yrecldp->rcl_const1i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2I", &yrecldp->rcl_const2i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3I", &yrecldp->rcl_const3i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4I", &yrecldp->rcl_const4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5I", &yrecldp->rcl_const5i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6I", &yrecldp->rcl_const6i); + read_hdf5(file_id, "/YRECLDP_RCL_APB1", &yrecldp->rcl_apb1); + read_hdf5(file_id, "/YRECLDP_RCL_APB2", &yrecldp->rcl_apb2); + read_hdf5(file_id, "/YRECLDP_RCL_APB3", &yrecldp->rcl_apb3); + read_hdf5(file_id, "/YRECLDP_RCL_AS", &yrecldp->rcl_as); + read_hdf5(file_id, "/YRECLDP_RCL_BS", &yrecldp->rcl_bs); + read_hdf5(file_id, "/YRECLDP_RCL_CS", &yrecldp->rcl_cs); + read_hdf5(file_id, "/YRECLDP_RCL_DS", &yrecldp->rcl_ds); + read_hdf5(file_id, "/YRECLDP_RCL_X1S", &yrecldp->rcl_x1s); + read_hdf5(file_id, "/YRECLDP_RCL_X2S", &yrecldp->rcl_x2s); + read_hdf5(file_id, "/YRECLDP_RCL_X3S", &yrecldp->rcl_x3s); + read_hdf5(file_id, "/YRECLDP_RCL_X4S", &yrecldp->rcl_x4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1S", &yrecldp->rcl_const1s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2S", &yrecldp->rcl_const2s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3S", &yrecldp->rcl_const3s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4S", &yrecldp->rcl_const4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5S", &yrecldp->rcl_const5s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6S", &yrecldp->rcl_const6s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST7S", &yrecldp->rcl_const7s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST8S", &yrecldp->rcl_const8s); + read_hdf5(file_id, "/YRECLDP_RDENSWAT", &yrecldp->rdenswat); + read_hdf5(file_id, "/YRECLDP_RDENSREF", &yrecldp->rdensref); + read_hdf5(file_id, "/YRECLDP_RCL_AR", &yrecldp->rcl_ar); + read_hdf5(file_id, "/YRECLDP_RCL_BR", &yrecldp->rcl_br); + read_hdf5(file_id, "/YRECLDP_RCL_CR", &yrecldp->rcl_cr); + read_hdf5(file_id, "/YRECLDP_RCL_DR", &yrecldp->rcl_dr); + read_hdf5(file_id, "/YRECLDP_RCL_X1R", &yrecldp->rcl_x1r); + read_hdf5(file_id, "/YRECLDP_RCL_X2R", &yrecldp->rcl_x2r); + read_hdf5(file_id, "/YRECLDP_RCL_X4R", &yrecldp->rcl_x4r); + read_hdf5(file_id, "/YRECLDP_RCL_KA273", &yrecldp->rcl_ka273); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM1", &yrecldp->rcl_cdenom1); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM2", &yrecldp->rcl_cdenom2); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM3", &yrecldp->rcl_cdenom3); + read_hdf5(file_id, "/YRECLDP_RCL_SCHMIDT", &yrecldp->rcl_schmidt); + read_hdf5(file_id, "/YRECLDP_RCL_DYNVISC", &yrecldp->rcl_dynvisc); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1R", &yrecldp->rcl_const1r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2R", &yrecldp->rcl_const2r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3R", &yrecldp->rcl_const3r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4R", &yrecldp->rcl_const4r); + read_hdf5(file_id, "/YRECLDP_RCL_FAC1", &yrecldp->rcl_fac1); + read_hdf5(file_id, "/YRECLDP_RCL_FAC2", &yrecldp->rcl_fac2); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5R", &yrecldp->rcl_const5r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6R", &yrecldp->rcl_const6r); + read_hdf5(file_id, "/YRECLDP_RCL_FZRAB", &yrecldp->rcl_fzrab); + read_hdf5(file_id, "/YRECLDP_RCL_FZRBB", &yrecldp->rcl_fzrbb); + read_hdf5_int(file_id, "/YRECLDP_LCLDEXTRA", &yrecldp->lcldextra); // Bool + read_hdf5_int(file_id, "/YRECLDP_LCLDBUDGET", &yrecldp->lcldbudget); // Bool + read_hdf5_int(file_id, "/YRECLDP_NSSOPT", &yrecldp->nssopt); + read_hdf5_int(file_id, "/YRECLDP_NCLDTOP", &yrecldp->ncldtop); + read_hdf5_int(file_id, "/YRECLDP_NAECLBC", &yrecldp->naeclbc); + read_hdf5_int(file_id, "/YRECLDP_NAECLDU", &yrecldp->naecldu); + read_hdf5_int(file_id, "/YRECLDP_NAECLOM", &yrecldp->naeclom); + read_hdf5_int(file_id, "/YRECLDP_NAECLSS", &yrecldp->naeclss); + read_hdf5_int(file_id, "/YRECLDP_NAECLSU", &yrecldp->naeclsu); + read_hdf5_int(file_id, "/YRECLDP_NCLDDIAG", &yrecldp->nclddiag); + read_hdf5_int(file_id, "/YRECLDP_NAERCLD", &yrecldp->naercld); + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOLSP", &yrecldp->laerliqautolsp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCP", &yrecldp->laerliqautocp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCPB", &yrecldp->laerliqautocpb); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQCOLL", &yrecldp->laerliqcoll); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICESED", &yrecldp->laericesed); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICEAUTO", &yrecldp->laericeauto); // Bool + read_hdf5(file_id, "/YRECLDP_NSHAPEP", &yrecldp->nshapep); + read_hdf5(file_id, "/YRECLDP_NSHAPEQ", &yrecldp->nshapeq); + read_hdf5_int(file_id, "/YRECLDP_NBETA", &yrecldp->nbeta); + + status = H5Fclose(file_id); + +#endif + } @@ -412,13 +710,15 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) { + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "reference", "Binary"); serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); serialboxSavepoint_t* savepoint = savepoints[0]; - int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); - load_and_expand_2d(serializer, savepoint, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); load_and_expand_2d(serializer, savepoint, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); load_and_expand_1d(serializer, savepoint, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); @@ -443,4 +743,37 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng serialboxSerializerDestroySavepointVector(savepoints, 1); serialboxSerializerDestroy(serializer); +#endif + +#ifdef HAVE_HDF5 + + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(REFERENCE_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + load_and_expand_2d(file_id, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(file_id, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); + load_and_expand_1d(file_id, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); + load_and_expand_2d(file_id, "PFSQLF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqlf); + load_and_expand_2d(file_id, "PFSQIF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqif); + load_and_expand_2d(file_id, "PFCQLNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqlng); + load_and_expand_2d(file_id, "PFCQNNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqnng); + load_and_expand_2d(file_id, "PFSQRF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqrf); + load_and_expand_2d(file_id, "PFSQSF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqsf); + load_and_expand_2d(file_id, "PFCQRNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqrng); + load_and_expand_2d(file_id, "PFCQSNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqsng); + load_and_expand_2d(file_id, "PFSQLTUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqltur); + load_and_expand_2d(file_id, "PFSQITUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqitur); + load_and_expand_2d(file_id, "PFPLSL", nlon, nlev+1, nproma, ngptot, nblocks, pfplsl); + load_and_expand_2d(file_id, "PFPLSN", nlon, nlev+1, nproma, ngptot, nblocks, pfplsn); + load_and_expand_2d(file_id, "PFHPSL", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsl); + load_and_expand_2d(file_id, "PFHPSN", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsn); + load_and_expand_2d(file_id, "TENDENCY_LOC_T", nlon, nlev, nproma, ngptot, nblocks, tend_loc_t); + load_and_expand_2d(file_id, "TENDENCY_LOC_Q", nlon, nlev, nproma, ngptot, nblocks, tend_loc_q); + load_and_expand_2d(file_id, "TENDENCY_LOC_A", nlon, nlev, nproma, ngptot, nblocks, tend_loc_a); + load_and_expand_3d(file_id, "TENDENCY_LOC_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_loc_cld); + + status = H5Fclose(file_id); +#endif + } From ff8323941d1964630e87c4800e36ac60fc6cb910 Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Mon, 11 Dec 2023 11:31:28 +0200 Subject: [PATCH 115/174] add arch files for Lumi cray GPU cce 16.0.1 --- arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh | 50 +++++++++++++++++++ .../lumi/cray-gpu/16.0.1/toolchain.cmake | 43 ++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh create mode 100644 arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh new file mode 100644 index 00000000..8d9504eb --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh @@ -0,0 +1,50 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload to be certain +module reset + +# Load modules +module_load PrgEnv-cray/8.3.3 +module_load LUMI/23.03 +module_load rocm/5.2.3 +module_load cce/16.0.1 +module_load cray-libsci/22.08.1.1 +module_load cray-mpich/8.1.18 +module_load craype/2.7.23 +module_load craype-accel-amd-gfx90a +module_load buildtools/23.03 +module_load cray-hdf5/1.12.1.5 +module_load cray-python/3.9.12.1 +module_load Boost/1.81.0-cpeCray-23.03 +module_load partition/G + +module list + +set -x + +export CC=cc CXX=CC FC=ftn + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake new file mode 100644 index 00000000..63de4a48 --- /dev/null +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake @@ -0,0 +1,43 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI OFF ) +set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) + +#################################################################### +# OpenMP FLAGS +#################################################################### + +set( ENABLE_OMP ON CACHE STRING "" ) +set( OpenMP_C_FLAGS "-homp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-homp -hnoacc -hlist=aimd -maxrregcount 64" CACHE STRING "" ) + +#################################################################### +# OpenACC FLAGS +#################################################################### + +set( ENABLE_ACC ON CACHE STRING "" ) +set( OpenACC_C_FLAGS "-hacc" ) +set( OpenACC_CXX_FLAGS "-hacc" ) +set( OpenACC_Fortran_FLAGS "-hacc") # -h acc_model=deep_copy" ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) +set(ECBUILD_Fortran_FLAGS "-hcontiguous") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") + +# set(ECBUILD_Fortran_FLAGS_BIT "-O3 -G2 -haggress -DNDEBUG") +set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") From 25bb5276733f92ce8b6119dfff2964add16d7bdc Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Tue, 12 Dec 2023 16:34:18 +0200 Subject: [PATCH 116/174] adding col/s as metric for HIP variants --- src/cloudsc_hip/cloudsc/cloudsc_driver.cpp | 18 +++++++++++------- .../cloudsc/cloudsc_driver_hoist.cpp | 18 +++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp b/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp index c03a877d..dc7efc79 100644 --- a/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver.cpp @@ -457,9 +457,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { double t2 = omp_get_wtime(); printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); - printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", - "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); - double zfrac, zmflops; + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; for (int t = 0; t < numthreads; t++) { const double tloc = zinfo[0][t]; const int coreid = (int) zinfo[1][t]; @@ -468,21 +468,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zfrac = (double)igpc / (double)numcols; if (tloc > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; } else { zmflops = 0.; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", - numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); } double tdiff = t2 - t1; zfrac = 1.0; if (tdiff > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; } else { zmflops = 0.0; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", - numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, diff --git a/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp index 657bed97..d8090966 100644 --- a/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp +++ b/src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp @@ -498,9 +498,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { double t2 = omp_get_wtime(); printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); - printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", - "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); - double zfrac, zmflops; + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; for (int t = 0; t < numthreads; t++) { const double tloc = zinfo[0][t]; const int coreid = (int) zinfo[1][t]; @@ -509,21 +509,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zfrac = (double)igpc / (double)numcols; if (tloc > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; } else { zmflops = 0.; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", - numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); } double tdiff = t2 - t1; zfrac = 1.0; if (tdiff > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; } else { zmflops = 0.0; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", - numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, From a471d1cfe89741ef8b99f614a89bd955b7e8f534 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Tue, 12 Dec 2023 15:10:23 +0000 Subject: [PATCH 117/174] Introducing SYCL implementations/variants (SCC, SCC-HOIST, SCC-K-CACHING) --- bundle.yml | 5 + src/CMakeLists.txt | 1 + src/cloudsc_sycl/CMakeLists.txt | 144 + src/cloudsc_sycl/cloudsc/cloudsc_c.kernel | 2633 ++++++++++++++++ .../cloudsc/cloudsc_c_hoist.kernel | 2651 +++++++++++++++++ .../cloudsc/cloudsc_c_k_caching.kernel | 2631 ++++++++++++++++ src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp | 619 ++++ src/cloudsc_sycl/cloudsc/cloudsc_driver.h | 20 + .../cloudsc/cloudsc_driver_hoist.cpp | 678 +++++ .../cloudsc/cloudsc_driver_hoist.h | 20 + .../cloudsc/cloudsc_driver_k_caching.cpp | 617 ++++ src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp | 245 ++ src/cloudsc_sycl/cloudsc/cloudsc_validate.h | 18 + src/cloudsc_sycl/cloudsc/load_state.cpp | 457 +++ src/cloudsc_sycl/cloudsc/load_state.h | 40 + src/cloudsc_sycl/cloudsc/mycpu.cpp | 31 + src/cloudsc_sycl/cloudsc/mycpu.h | 11 + src/cloudsc_sycl/cloudsc/yoecldp_c.h | 145 + src/cloudsc_sycl/dwarf_cloudsc.cpp | 44 + 19 files changed, 11010 insertions(+) create mode 100644 src/cloudsc_sycl/CMakeLists.txt create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_c.kernel create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_driver.h create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.h create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp create mode 100644 src/cloudsc_sycl/cloudsc/cloudsc_validate.h create mode 100644 src/cloudsc_sycl/cloudsc/load_state.cpp create mode 100644 src/cloudsc_sycl/cloudsc/load_state.h create mode 100644 src/cloudsc_sycl/cloudsc/mycpu.cpp create mode 100644 src/cloudsc_sycl/cloudsc/mycpu.h create mode 100644 src/cloudsc_sycl/cloudsc/yoecldp_c.h create mode 100644 src/cloudsc_sycl/dwarf_cloudsc.cpp diff --git a/bundle.yml b/bundle.yml index f0340013..35f65f18 100644 --- a/bundle.yml +++ b/bundle.yml @@ -115,6 +115,11 @@ options : cmake: > ENABLE_HIP=ON + - with-sycl : + help: Enable GPU kernel variant based on HIP + cmake: > + ENABLE_SYCL=ON + - with-mpi : help : Enable MPI-parallel kernel cmake : ENABLE_MPI=ON diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e63079c4..637da0ec 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,5 +15,6 @@ add_subdirectory(cloudsc_python) add_subdirectory(cloudsc_c) add_subdirectory(cloudsc_cuda) add_subdirectory(cloudsc_hip) +add_subdirectory(cloudsc_sycl) add_subdirectory(cloudsc_gpu) add_subdirectory(cloudsc_loki) diff --git a/src/cloudsc_sycl/CMakeLists.txt b/src/cloudsc_sycl/CMakeLists.txt new file mode 100644 index 00000000..8929a614 --- /dev/null +++ b/src/cloudsc_sycl/CMakeLists.txt @@ -0,0 +1,144 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Define this dwarf variant as an ECBuild feature +ecbuild_add_option( FEATURE CLOUDSC_SYCL + DESCRIPTION "Build the SYCL version CLOUDSC using Serialbox" DEFAULT ON + CONDITION Serialbox_FOUND AND HAVE_SYCL +) + +if( HAVE_CLOUDSC_SYCL ) + + set(LINK_SYCL OFF) + if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") + set(LINK_SYCL ON) + endif() + + ecbuild_add_library( + TARGET dwarf-cloudsc-scc-sycl-lib + INSTALL_HEADERS LISTED + SOURCES + cloudsc/yoecldp_c.h + cloudsc/load_state.h + cloudsc/load_state.cpp + cloudsc/cloudsc_c.kernel + cloudsc/cloudsc_driver.h + cloudsc/cloudsc_driver.cpp + cloudsc/cloudsc_validate.h + cloudsc/cloudsc_validate.cpp + cloudsc/mycpu.h + cloudsc/mycpu.cpp + PUBLIC_INCLUDES + $ + $ + PUBLIC_LIBS + $<$:sycl> + Serialbox::Serialbox_C + $<${HAVE_OMP}:OpenMP::OpenMP_C> + ) + + + ecbuild_add_executable( + TARGET dwarf-cloudsc-scc-sycl + SOURCES dwarf_cloudsc.cpp + LIBS dwarf-cloudsc-scc-sycl-lib + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-sycl-serial + COMMAND bin/dwarf-cloudsc-scc-sycl + ARGS 1 1000 128 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) + + ###### + + ecbuild_add_library( + TARGET dwarf-cloudsc-scc-hoist-sycl-lib + INSTALL_HEADERS LISTED + SOURCES + cloudsc/yoecldp_c.h + cloudsc/load_state.h + cloudsc/load_state.cpp + cloudsc/cloudsc_c_hoist.kernel + cloudsc/cloudsc_driver_hoist.h + cloudsc/cloudsc_driver_hoist.cpp + cloudsc/cloudsc_validate.h + cloudsc/cloudsc_validate.cpp + cloudsc/mycpu.h + cloudsc/mycpu.cpp + PUBLIC_INCLUDES + $ + $ + PUBLIC_LIBS + $<$:sycl> + Serialbox::Serialbox_C + $<${HAVE_OMP}:OpenMP::OpenMP_C> + ) + + ecbuild_add_executable( + TARGET dwarf-cloudsc-scc-hoist-sycl + SOURCES dwarf_cloudsc.cpp + LIBS dwarf-cloudsc-scc-hoist-sycl-lib + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-sycl-hoist-serial + COMMAND bin/dwarf-cloudsc-scc-hoist-sycl + ARGS 1 1000 128 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) + + ###### + + ecbuild_add_library( + TARGET dwarf-cloudsc-scc-k-caching-sycl-lib + INSTALL_HEADERS LISTED + SOURCES + cloudsc/yoecldp_c.h + cloudsc/load_state.h + cloudsc/load_state.cpp + cloudsc/cloudsc_c_k_caching.kernel + cloudsc/cloudsc_driver.h + cloudsc/cloudsc_driver_k_caching.cpp + cloudsc/cloudsc_validate.h + cloudsc/cloudsc_validate.cpp + cloudsc/mycpu.h + cloudsc/mycpu.cpp + PUBLIC_INCLUDES + $ + $ + PUBLIC_LIBS + $<$:sycl> + Serialbox::Serialbox_C + $<${HAVE_OMP}:OpenMP::OpenMP_C> + ) + + ecbuild_add_executable( + TARGET dwarf-cloudsc-scc-k-caching-sycl + SOURCES dwarf_cloudsc.cpp + LIBS dwarf-cloudsc-scc-k-caching-sycl-lib + ) + + ecbuild_add_test( + TARGET dwarf-cloudsc-sycl-k-caching-serial + COMMAND bin/dwarf-cloudsc-scc-k-caching-sycl + ARGS 1 1000 128 + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. + OMP 1 + ) + + # Create symlink for the input data + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + +else() + ecbuild_info( "Serialbox not found, disabling SYCL version" ) +endif() diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_c.kernel b/src/cloudsc_sycl/cloudsc/cloudsc_c.kernel new file mode 100644 index 00000000..cb0926bd --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_c.kernel @@ -0,0 +1,2633 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include +#include +#include + +void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2, + cl::sycl:: stream out_stream, cl::sycl::nd_item<1> item_ct) { + + //------------------------------------------------------------------------------- + // Declare input/output arguments + //------------------------------------------------------------------------------- + + // PLCRIT_AER : critical liquid mmr for rain autoconversion process + // PICRIT_AER : critical liquid mmr for snow autoconversion process + // PRE_LIQ : liq Re + // PRE_ICE : ice Re + // PCCN : liquid cloud condensation nuclei + // PNICE : ice number concentration (cf. CCN) + + const int klev = 137; // Number of levels + + double zlcond1, zlcond2, zlevapl, zlevapi, zrainaut, zsnowaut, zliqcld, zicecld; + double zlevap, zleros; + // condensation and evaporation terms + // autoconversion terms + double zfokoop; + double zfoealfa[klev + 1]; + double zicenuclei; // number concentration of ice nuclei + + double zlicld; + double zacond; + double zaeros; + double zlfinalsum; + double zdqs; + double ztold; + double zqold; + double zdtgdp; + double zrdtgdp; + double ztrpaus; + double zcovpclr; + double zpreclr; + double zcovptot; + double zcovpmax; + double zqpretot; + double zdpevap; + double zdtforc; + double zdtdiab; + double ztp1[klev]; + double zldefr; + double zldifdt; + double zdtgdpf; + double zlcust[5]; + double zacust; + double zmf; + + double zrho; + double ztmp1, ztmp2, ztmp3; + double ztmp4, ztmp5, ztmp6, ztmp7; + double zalfawm; + + // Accumulators of A,B,and C factors for cloud equations + double zsolab; // -ve implicit CC + double zsolac; // linear CC + double zanew; + double zanewm1; + + double zgdp; + + //---for flux calculation + double zda; + double zli[klev], za[klev]; + double zaorig[klev]; // start of scheme value for CC + + int llflag; + int llo1; + + int icall, ik, jk, jl, jm, jn, jo, jlen, is; + + double zdp, zpaphd; + + double zalfa; + // & ZALFACU, ZALFALS + double zalfaw; + double zbeta, zbeta1; + //REAL(KIND=JPRB) :: ZBOTT + double zcfpr; + double zcor; + double zcdmax; + double zmin; + double zlcondlim; + double zdenom; + double zdpmxdt; + double zdpr; + double zdtdp; + double ze; + double zepsec; + double zfac, zfaci, zfacw; + double zgdcp; + double zinew; + double zlcrit; + double zmfdn; + double zprecip; + double zqe; + double zqsat, zqtmst, zrdcp; + double zrhc, zsig, zsigk; + double zwtot; + double zzco, zzdl, zzrh, zzzdt, zqadj; + double zqnew, ztnew; + double zrg_r, zgdph_r, zcons1, zcond, zcons1a; + double zlfinal; + double zmelt; + double zevap; + double zfrz; + double zvpliq, zvpice; + double zadd, zbdd, zcvds, zice0, zdepos; + double zsupsat; + double zfall; + double zre_ice; + double zrldcp; + double zqp1env; + + //---------------------------- + // Arrays for new microphysics + //---------------------------- + int iphase[5]; // marker for water phase of each species + // 0=vapour, 1=liquid, 2=ice + + int imelt[5]; // marks melting linkage for ice categories + // ice->liquid, snow->rain + + int llfall[5]; // marks falling species + // LLFALL=0, cloud cover must > 0 for zqx > 0 + // LLFALL=1, no cloud needed, zqx can evaporate + + int llindex1[5]; // index variable + int llindex3[5 * 5]; // index variable + double zmax; + double zrat; + int iorder[5]; // array for sorting explicit terms + + double zliqfrac[klev]; // cloud liquid water fraction: ql/(ql+qi) + double zicefrac[klev]; // cloud ice water fraction: qi/(ql+qi) + double zqxn[5]; // new values for zqx at time+1 + double zqxfg[5]; // first guess values including precip + double zqxnm1[5]; // new values for zqx at time+1 at level above + double zfluxq[5]; // fluxes convergence of species (needed?) + // Keep the following for possible future total water variance scheme? + //REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + //REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + //REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + //REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + //REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + double zmeltmax; + double zfrzmax; + double zicetot; + + + double zqsmix[klev]; // diagnostic mixed phase saturation + //REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + double zqsliq[klev]; // liquid water saturation + double zqsice[klev]; // ice water saturation + + //REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + //REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + //REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + double zfoeewmt[klev]; + double zfoeew[klev]; + double zfoeeliqt[klev]; + //REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + double zdqsliqdt, zdqsicedt, zdqsmixdt; + double zcorqsliq; + double zcorqsice; + //REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + double zcorqsmix; + double zevaplimliq, zevaplimice, zevaplimmix; + + //------------------------------------------------------- + // SOURCE/SINK array for implicit and explicit terms + //------------------------------------------------------- + // a POSITIVE value entered into the arrays is a... + // Source of this variable + // | + // | Sink of this variable + // | | + // V V + // ZSOLQA(JL,IQa,IQb) = explicit terms + // ZSOLQB(JL,IQa,IQb) = implicit terms + // Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + // a source of NCLDQL and a sink of IQV + // put 'magic' source terms such as PLUDE from + // detrainment into explicit source/sink array diagnognal + // ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + // i.e. A positive value is a sink!????? weird... + //------------------------------------------------------- + + double zsolqa[5 * 5]; // explicit sources and sinks + double zsolqb[5 * 5]; // implicit sources and sinks + // e.g. microphysical pathways between ice variables. + double zqlhs[5 * 5]; // n x n matrix storing the LHS of implicit solver + double zvqx[5]; // fall speeds of three categories + double zexplicit; + double zratio[5], zsinksum[5]; + + // for sedimentation source/sink terms + double zfallsink[5]; + double zfallsrce[5]; + + // for convection detrainment source and subsidence source/sink terms + double zconvsrce[5]; + double zconvsink[5]; + + // for supersaturation source term from previous timestep + double zpsupsatsrce[5]; + + // Numerical fit to wet bulb temperature + double ztw1 = (double) 1329.31; + double ztw2 = (double) 0.0074615; + double ztw3 = (double) 0.85E5; + double ztw4 = (double) 40.637; + double ztw5 = (double) 275.0; + + double zsubsat; // Subsaturation for snow melting term + double ztdmtw0; // Diff between dry-bulb temperature and + // temperature when wet-bulb = 0degC + + // Variables for deposition term + double ztcg; // Temperature dependent function for ice PSD + double zfacx1i, zfacx1s; // PSD correction factor + double zaplusb, zcorrfac, zcorrfac2, zpr02, zterm1, zterm2; // for ice dep + double zcldtopdist; // Distance from cloud top + double zinfactor; // No. of ice nuclei factor for deposition + + // Autoconversion/accretion/riming/evaporation + int iwarmrain; + int ievaprain; + int ievapsnow; + int idepice; + double zrainacc; + double zraincld; + double zsnowrime; + double zsnowcld; + double zesatliq; + double zfallcorr; + double zlambda; + double zevap_denom; + double zcorr2; + double zka; + double zconst; + double ztemp; + + // Rain freezing + int llrainliq; // True if majority of raindrops are liquid (no ice core) + + //---------------------------- + // End: new microphysics + //---------------------------- + + //---------------------- + // SCM budget statistics + //---------------------- + double zrain; + + double zhook_handle; + double ztmpl, ztmpi, ztmpa; + + double zmm, zrr; + double zrg; + + double zzsum, zzratio; + double zepsilon; + + double zcond1, zqp; + + double psum_solqa; + int ibl; + int i_llfall_0; + double zqx[5 * klev]; + double zqx0[5 * klev]; + double zpfplsx[5 * (klev + 1)]; + double zlneg[5 * klev]; + double zqxn2d[5 * klev]; + + jl = item_ct.get_local_id(0); //threadIdx.x; + ibl = item_ct.get_group(0); // or 2? blockIdx.z; + + + //=============================================================================== + //IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) + + //=============================================================================== + // 0.0 Beginning of timestep book-keeping + //---------------------------------------------------------------------- + + + //###################################################################### + // 0. *** SET UP CONSTANTS *** + //###################################################################### + + zepsilon = (double) 100.*std::numeric_limits::epsilon(); //DBL_EPSILON; + + // --------------------------------------------------------------------- + // Set version of warm-rain autoconversion/accretion + // IWARMRAIN = 1 ! Sundquist + // IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + // --------------------------------------------------------------------- + iwarmrain = 2; + // --------------------------------------------------------------------- + // Set version of rain evaporation + // IEVAPRAIN = 1 ! Sundquist + // IEVAPRAIN = 2 ! Abel and Boutle (2013) + // --------------------------------------------------------------------- + ievaprain = 2; + // --------------------------------------------------------------------- + // Set version of snow evaporation + // IEVAPSNOW = 1 ! Sundquist + // IEVAPSNOW = 2 ! New + // --------------------------------------------------------------------- + ievapsnow = 1; + // --------------------------------------------------------------------- + // Set version of ice deposition + // IDEPICE = 1 ! Rotstayn (2001) + // IDEPICE = 2 ! New + // --------------------------------------------------------------------- + idepice = 1; + + // --------------------- + // Some simple constants + // --------------------- + zqtmst = (double) 1.0 / ptsphy; + zgdcp = rg / rcpd; + zrdcp = rd / rcpd; + zcons1a = rcpd / (rlmlt*rg*(*yrecldp).rtaumel); + zepsec = (double) 1.E-14; + zrg_r = (double) 1.0 / rg; + zrldcp = (double) 1.0 / (ralsdcp - ralvdcp); + + // Note: Defined in module/yoecldp.F90 + // NCLDQL=1 ! liquid cloud water + // NCLDQI=2 ! ice cloud water + // NCLDQR=3 ! rain water + // NCLDQS=4 ! snow + // NCLDQV=5 ! vapour + + // ----------------------------------------------- + // Define species phase, 0=vapour, 1=liquid, 2=ice + // ----------------------------------------------- + iphase[4] = 0; + iphase[0] = 1; + iphase[2] = 1; + iphase[1] = 2; + iphase[3] = 2; + + // --------------------------------------------------- + // Set up melting/freezing index, + // if an ice category melts/freezes, where does it go? + // --------------------------------------------------- + imelt[4] = -99; + imelt[0] = 2; + imelt[2] = 4; + imelt[1] = 3; + imelt[3] = 3; + + // ----------------------------------------------- + // INITIALIZATION OF OUTPUT TENDENCIES + // ----------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + tendency_loc_a[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + } + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))] = (double) 0.0; + } + } + + //-- These were uninitialized : meaningful only when we compare error differences + for (jk = 0; jk <= klev + -1; jk += 1) { + pcovptot[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + tendency_loc_cld[jl + klon*(jk + klev*(4 + 5*(ibl)))] = (double) 0.0 + ; + } + + // ------------------------- + // set up fall speeds in m/s + // ------------------------- + zvqx[4] = (double) 0.0; + zvqx[0] = (double) 0.0; + zvqx[1] = (*yrecldp).rvice; + zvqx[2] = (*yrecldp).rvrain; + zvqx[3] = (*yrecldp).rvsnow; + for (i_llfall_0 = 0; i_llfall_0 <= 5 + -1; i_llfall_0 += 1) { + llfall[i_llfall_0] = false; + } + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (zvqx[jm] > (double) 0.0) { + llfall[jm] = true; + } + // falling species + } + // Set LLFALL to false for ice (but ice still sediments!) + // Need to rationalise this at some point + llfall[1] = false; + + + //###################################################################### + // 1. *** INITIAL VALUES FOR VARIABLES *** + //###################################################################### + + + // ---------------------- + // non CLV initialization + // ---------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + ztp1[jk] = pt[jl + klon*(jk + klev*(ibl))] + ptsphy*tendency_tmp_t[ + jl + klon*(jk + klev*(ibl))]; + zqx[jk + klev*(4)] = pq[jl + klon*(jk + klev*(ibl))] + + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*(ibl))]; + zqx0[jk + klev*(4)] = pq[jl + klon*(jk + klev*(ibl))] + + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*(ibl))]; + za[jk] = pa[jl + klon*(jk + klev*(ibl))] + ptsphy*tendency_tmp_a[jl + + klon*(jk + klev*(ibl))]; + zaorig[jk] = pa[jl + klon*(jk + klev*(ibl))] + ptsphy*tendency_tmp_a[ + jl + klon*(jk + klev*(ibl))]; + } + + // ------------------------------------- + // initialization for CLV family + // ------------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqx[jk + klev*jm] = pclv[jl + klon*(jk + klev*(jm + 5*(ibl)))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))]; + zqx0[jk + klev*jm] = pclv[jl + klon*(jk + klev*(jm + 5*(ibl)))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))]; + } + } + + //------------- + // zero arrays + //------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + zpfplsx[jk + (klev + 1)*jm] = (double) 0.0; // precip fluxes + } + } + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqxn2d[jk + klev*jm] = (double) 0.0; // end of timestep values in 2D + zlneg[jk + klev*jm] = (double) 0.0; // negative input check + } + } + + prainfrac_toprfz[jl + klon*(ibl)] = (double) 0.0; // rain fraction at top of refreezing layer + llrainliq = true; // Assume all raindrops are liquid initially + + // ---------------------------------------------------- + // Tidy up very small cloud cover or total cloud water + // ---------------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + if (zqx[jk + klev*(0)] + zqx[jk + klev*(1)] < (*yrecldp).rlmin || za[jk] + < (*yrecldp).ramin) { + + // Evaporate small cloud liquid water amounts + zlneg[jk + klev*(0)] = zlneg[jk + klev*(0)] + zqx[jk + klev*(0)]; + zqadj = zqx[jk + klev*(0)]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = + tendency_loc_q[jl + klon*(jk + klev*(ibl))] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralvdcp*zqadj; + zqx[jk + klev*(4)] = zqx[jk + klev*(4)] + zqx[jk + klev*(0)]; + zqx[jk + klev*(0)] = (double) 0.0; + + // Evaporate small cloud ice water amounts + zlneg[jk + klev*(1)] = zlneg[jk + klev*(1)] + zqx[jk + klev*(1)]; + zqadj = zqx[jk + klev*(1)]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = + tendency_loc_q[jl + klon*(jk + klev*(ibl))] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralsdcp*zqadj; + zqx[jk + klev*(4)] = zqx[jk + klev*(4)] + zqx[jk + klev*(1)]; + zqx[jk + klev*(1)] = (double) 0.0; + + // Set cloud cover to zero + za[jk] = (double) 0.0; + + } + } + + // --------------------------------- + // Tidy up small CLV variables + // --------------------------------- + //DIR$ IVDEP + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + //DIR$ IVDEP + for (jk = 0; jk <= klev + -1; jk += 1) { + //DIR$ IVDEP + if (zqx[jk + klev*jm] < (*yrecldp).rlmin) { + zlneg[jk + klev*jm] = zlneg[jk + klev*jm] + zqx[jk + klev*jm]; + zqadj = zqx[jk + klev*jm]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = + tendency_loc_q[jl + klon*(jk + klev*(ibl))] + zqadj; + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralvdcp*zqadj; + } + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = + tendency_loc_t[jl + klon*(jk + klev*(ibl))] - ralsdcp*zqadj; + } + zqx[jk + klev*(4)] = zqx[jk + klev*(4)] + zqx[jk + klev*jm]; + zqx[jk + klev*jm] = (double) 0.0; + } + } + } + + + // ------------------------------ + // Define saturation values + // ------------------------------ + for (jk = 0; jk <= klev + -1; jk += 1) { + //---------------------------------------- + // old *diagnostic* mixed phase saturation + //---------------------------------------- + zfoealfa[jk] = ((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))); + zfoeewmt[jk] = + cl::sycl::fmin(((double)(r2es*((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*cl::sycl::exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)) + (1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))) / pap[jl + klon*(jk + klev*(ibl))], (double) 0.5); + zqsmix[jk] = zfoeewmt[jk]; + zqsmix[jk] = zqsmix[jk] / ((double) 1.0 - retv*zqsmix[jk]); + + //--------------------------------------------- + // ice saturation T<273K + // liquid water saturation for T>273K + //--------------------------------------------- + zalfa = ((double)(cl::sycl::fmax(0.0, copysign(1.0, ztp1[jk] - rtt)))); + zfoeew[jk] = cl::sycl::fmin((zalfa*((double)(r2es*cl::sycl::exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))) + ((double) 1.0 - zalfa)*((double)(r2es*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))) / pap[jl + + klon*(jk + klev*(ibl))], (double) 0.5); + zfoeew[jk] = cl::sycl::fmin((double) 0.5, zfoeew[jk]); + zqsice[jk] = zfoeew[jk] / ((double) 1.0 - retv*zfoeew[jk]); + + //---------------------------------- + // liquid water saturation + //---------------------------------- + zfoeeliqt[jk] = + cl::sycl::fmin(((double)(r2es*cl::sycl::exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))) / pap[jl + klon*(jk + klev*(ibl))], (double) 0.5); + zqsliq[jk] = zfoeeliqt[jk]; + zqsliq[jk] = zqsliq[jk] / ((double) 1.0 - retv*zqsliq[jk]); + + // !---------------------------------- + // ! ice water saturation + // !---------------------------------- + // ZFOEEICET(JL,JK)=MIN(((double)(r2es*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + // ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + // ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + + } + + for (jk = 0; jk <= klev + -1; jk += 1) { + + + //------------------------------------------ + // Ensure cloud fraction is between 0 and 1 + //------------------------------------------ + za[jk] = cl::sycl::fmax((double) 0.0, cl::sycl::fmin((double) 1.0, za[jk])); + + //------------------------------------------------------------------- + // Calculate liq/ice fractions (no longer a diagnostic relationship) + //------------------------------------------------------------------- + zli[jk] = zqx[jk + klev*(0)] + zqx[jk + klev*(1)]; + if (zli[jk] > (*yrecldp).rlmin) { + zliqfrac[jk] = zqx[jk + klev*(0)] / zli[jk]; + zicefrac[jk] = (double) 1.0 - zliqfrac[jk]; + } else { + zliqfrac[jk] = (double) 0.0; + zicefrac[jk] = (double) 0.0; + } + + } + + //###################################################################### + // 2. *** CONSTANTS AND PARAMETERS *** + //###################################################################### + // Calculate L in updrafts of bl-clouds + // Specify QS, P/PS for tropopause (for c2) + // And initialize variables + //------------------------------------------ + + //--------------------------------- + // Find tropopause level (ZTRPAUS) + //--------------------------------- + ztrpaus = (double) 0.1; + zpaphd = (double) 1.0 / paph[jl + klon*(klev + (klev + 1)*(ibl))]; + for (jk = 0; jk <= klev - 1 + -1; jk += 1) { + zsig = pap[jl + klon*(jk + klev*(ibl))]*zpaphd; + if (zsig > (double) 0.1 && zsig < (double) 0.4 && ztp1[jk] > ztp1[1 + jk]) { + ztrpaus = zsig; + } + } + + //----------------------------- + // Reset single level variables + //----------------------------- + + zanewm1 = (double) 0.0; + zda = (double) 0.0; + zcovpclr = (double) 0.0; + zcovpmax = (double) 0.0; + zcovptot = (double) 0.0; + zcldtopdist = (double) 0.0; + + //###################################################################### + // 3. *** PHYSICS *** + //###################################################################### + + + //---------------------------------------------------------------------- + // START OF VERTICAL LOOP + //---------------------------------------------------------------------- + + for (jk = -1 + (*yrecldp).ncldtop; jk <= klev + -1; jk += 1) { + + //---------------------------------------------------------------------- + // 3.0 INITIALIZE VARIABLES + //---------------------------------------------------------------------- + + //--------------------------------- + // First guess microphysics + //--------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxfg[jm] = zqx[jk + klev*jm]; + } + + //--------------------------------- + // Set KLON arrays to zero + //--------------------------------- + + zlicld = (double) 0.0; + zrainaut = (double) 0.0; // currently needed for diags + zrainacc = (double) 0.0; // currently needed for diags + zsnowaut = (double) 0.0; // needed + zldefr = (double) 0.0; + zacust = (double) 0.0; // set later when needed + zqpretot = (double) 0.0; + zlfinalsum = (double) 0.0; + + // Required for first guess call + zlcond1 = (double) 0.0; + zlcond2 = (double) 0.0; + zsupsat = (double) 0.0; + zlevapl = (double) 0.0; + zlevapi = (double) 0.0; + + //------------------------------------- + // solvers for cloud fraction + //------------------------------------- + zsolab = (double) 0.0; + zsolac = (double) 0.0; + + zicetot = (double) 0.0; + + //------------------------------------------ + // reset matrix so missing pathways are set + //------------------------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsolqb[jn + 5*jm] = (double) 0.0; + zsolqa[jn + 5*jm] = (double) 0.0; + } + } + + //---------------------------------- + // reset new microphysics variables + //---------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zfallsrce[jm] = (double) 0.0; + zfallsink[jm] = (double) 0.0; + zconvsrce[jm] = (double) 0.0; + zconvsink[jm] = (double) 0.0; + zpsupsatsrce[jm] = (double) 0.0; + zratio[jm] = (double) 0.0; + } + + + //------------------------- + // derived variables needed + //------------------------- + + zdp = paph[jl + klon*(1 + jk + (klev + 1)*(ibl))] - paph[jl + + klon*(jk + (klev + 1)*(ibl))]; // dp + zgdp = rg / zdp; // g/dp + zrho = pap[jl + klon*(jk + klev*(ibl))] / (rd*ztp1[jk]); // p/RT air density + + zdtgdp = ptsphy*zgdp; // dt g/dp + zrdtgdp = zdp*((double) 1.0 / (ptsphy*rg)); // 1/(dt g/dp) + + if (jk + 1 > 1) { + zdtgdpf = ptsphy*rg / (pap[jl + klon*(jk + klev*(ibl))] - pap[jl + + klon*(-1 + jk + klev*(ibl))]); + } + + //------------------------------------ + // Calculate dqs/dT correction factor + //------------------------------------ + // Reminder: RETV=RV/RD-1 + + // liquid + zfacw = r5les / (cl::sycl::pow((ztp1[jk] - r4les), 2.0)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeeliqt[jk]); + zdqsliqdt = zfacw*zcor*zqsliq[jk]; + zcorqsliq = (double) 1.0 + ralvdcp*zdqsliqdt; + + // ice + zfaci = r5ies / (cl::sycl::pow((ztp1[jk] - r4ies), 2.0)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeew[jk]); + zdqsicedt = zfaci*zcor*zqsice[jk]; + zcorqsice = (double) 1.0 + ralsdcp*zdqsicedt; + + // diagnostic mixed + zalfaw = zfoealfa[jk]; + zalfawm = zalfaw; + zfac = zalfaw*zfacw + ((double) 1.0 - zalfaw)*zfaci; + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeewmt[jk]); + zdqsmixdt = zfac*zcor*zqsmix[jk]; + zcorqsmix = (double) 1.0 + ((double)((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*ralvdcp + (1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*ralsdcp))*zdqsmixdt; + + // evaporation/sublimation limits + zevaplimmix = + cl::sycl::fmax((zqsmix[jk] - zqx[jk + klev*(4)]) / zcorqsmix, (double) 0.0); + zevaplimliq = + cl::sycl::fmax((zqsliq[jk] - zqx[jk + klev*(4)]) / zcorqsliq, (double) 0.0); + zevaplimice = + cl::sycl::fmax((zqsice[jk] - zqx[jk + klev*(4)]) / zcorqsice, (double) 0.0); + + //-------------------------------- + // in-cloud consensate amount + //-------------------------------- + ztmpa = (double) 1.0 / cl::sycl::fmax(za[jk], zepsec); + zliqcld = zqx[jk + klev*(0)]*ztmpa; + zicecld = zqx[jk + klev*(1)]*ztmpa; + zlicld = zliqcld + zicecld; + + + //------------------------------------------------ + // Evaporate very small amounts of liquid and ice + //------------------------------------------------ + + if (zqx[jk + klev*(0)] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(0)] = zqx[jk + klev*(0)]; + zsolqa[0 + 5*(4)] = -zqx[jk + klev*(0)]; + } + + if (zqx[jk + klev*(1)] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(1)] = zqx[jk + klev*(1)]; + zsolqa[1 + 5*(4)] = -zqx[jk + klev*(1)]; + } + + + //--------------------------------------------------------------------- + // 3.1 ICE SUPERSATURATION ADJUSTMENT + //--------------------------------------------------------------------- + // Note that the supersaturation adjustment is made with respect to + // liquid saturation: when T>0C + // ice saturation: when T<0C + // with an adjustment made to allow for ice + // supersaturation in the clear sky + // Note also that the KOOP factor automatically clips the supersaturation + // to a maximum set by the liquid water saturation mixing ratio + // important for temperatures near to but below 0C + //----------------------------------------------------------------------- + + //DIR$ NOFUSION + + //----------------------------------- + // 3.1.1 Supersaturation limit (from Koop) + //----------------------------------- + // Needs to be set for all temperatures + zfokoop = ((double)(cl::sycl::fmin(rkoop1 - rkoop2*ztp1[jk], (double)(r2es*cl::sycl::exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))*1.0/(double)(r2es*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies)))))); + + if (ztp1[jk] >= rtt || (*yrecldp).nssopt == 0) { + zfac = (double) 1.0; + zfaci = (double) 1.0; + } else { + zfac = za[jk] + zfokoop*((double) 1.0 - za[jk]); + zfaci = ptsphy / (*yrecldp).rkooptau; + } + + //------------------------------------------------------------------- + // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + // correction factor + // [#Note: QSICE or QSLIQ] + //------------------------------------------------------------------- + + // Calculate supersaturation to add to cloud + if (za[jk] > (double) 1.0 - (*yrecldp).ramin) { + zsupsat = + cl::sycl::fmax((zqx[jk + klev*(4)] - zfac*zqsice[jk]) / zcorqsice, (double) 0.0); + } else { + // Calculate environmental humidity supersaturation + zqp1env = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / cl::sycl::fmax((double) 1.0 - + za[jk], zepsilon); + //& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + zsupsat = cl::sycl::fmax(((double) 1.0 - za[jk])*(zqp1env - zfac*zqsice[jk]) / zcorqsice, + (double) 0.0); + } + + //------------------------------------------------------------------- + // Here the supersaturation is turned into liquid water + // However, if the temperature is below the threshold for homogeneous + // freezing then the supersaturation is turned instantly to ice. + //-------------------------------------------------------------------- + + if (zsupsat > zepsec) { + + if (ztp1[jk] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zsupsat; + // Include liquid in first guess + zqxfg[0] = zqxfg[0] + zsupsat; + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zsupsat; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + zsupsat; + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk])*zfaci; + + } + + //------------------------------------------------------- + // 3.1.3 Include supersaturation from previous timestep + // (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + //------------------------------------------------------- + if (psupsat[jl + klon*(jk + klev*(ibl))] > zepsec) { + if (ztp1[jk] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(0)] = + zsolqa[0 + 5*(0)] + psupsat[jl + klon*(jk + klev*(ibl))]; + zpsupsatsrce[0] = psupsat[jl + klon*(jk + klev*(ibl))]; + // Add liquid to first guess for deposition term + zqxfg[0] = zqxfg[0] + psupsat[jl + klon*(jk + klev*(ibl))]; + // Store cloud budget diagnostics if required + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(1)] = + zsolqa[1 + 5*(1)] + psupsat[jl + klon*(jk + klev*(ibl))]; + zpsupsatsrce[1] = psupsat[jl + klon*(jk + klev*(ibl))]; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + psupsat[jl + klon*(jk + klev*(ibl))]; + // Store cloud budget diagnostics if required + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk])*zfaci; + // Store cloud budget diagnostics if required + } + + // on JL + + //--------------------------------------------------------------------- + // 3.2 DETRAINMENT FROM CONVECTION + //--------------------------------------------------------------------- + // * Diagnostic T-ice/liq split retained for convection + // Note: This link is now flexible and a future convection + // scheme can detrain explicit seperate budgets of: + // cloud water, ice, rain and snow + // * There is no (1-ZA) multiplier term on the cloud detrainment + // term, since is now written in mass-flux terms + // [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + //--------------------------------------------------------------------- + if (jk + 1 < klev && jk + 1 >= (*yrecldp).ncldtop) { + + + plude[jl + klon*(jk + klev*(ibl))] = + plude[jl + klon*(jk + klev*(ibl))]*zdtgdp; + + if (/*ldcum[jl + klon*(ibl)] &&*/ plude[jl + klon*(jk + klev*(ibl + ))] > (*yrecldp).rlmin && plu[jl + klon*(1 + jk + klev*(ibl))] > + zepsec) { + + zsolac = zsolac + plude[jl + klon*(jk + klev*(ibl))] / plu[jl + + klon*(1 + jk + klev*(ibl))]; + // *diagnostic temperature split* + zalfaw = zfoealfa[jk]; + zconvsrce[0] = zalfaw*plude[jl + klon*(jk + klev*(ibl))]; + zconvsrce[1] = + ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*(ibl))]; + zsolqa[0 + 5*(0)] = zsolqa[0 + 5*(0)] + zconvsrce[0]; + zsolqa[1 + 5*(1)] = zsolqa[1 + 5*(1)] + zconvsrce[1]; + + } else { + + plude[jl + klon*(jk + klev*(ibl))] = (double) 0.0; + + } + // *convective snow detrainment source + //if (ldcum[jl + klon*(ibl)]) { + zsolqa[3 + 5*(3)] = zsolqa[3 + 5*(3)] + psnde[jl + + klon*(jk + klev*(ibl))]*zdtgdp; + //} + + + } + // JK (*yrecldp).ncldtop) { + + zmf = cl::sycl::fmax((double) 0.0, (pmfu[jl + klon*(jk + klev*(ibl))] + pmfd[-1 + + jl + klon*(jk + klev*(ibl))])*zdtgdp); + zacust = zmf*zanewm1; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlcust[jm] = zmf*zqxnm1[jm]; + // record total flux for enthalpy budget: + zconvsrce[jm] = zconvsrce[jm] + zlcust[jm]; + } + } + + // Now have to work out how much liquid evaporates at arrival point + // since there is no prognostic memory for in-cloud humidity, i.e. + // we always assume cloud is saturated. + + zdtdp = zrdcp*(double) 0.5*(ztp1[-1 + jk] + ztp1[jk]) / paph[jl + klon*(jk + + (klev + 1)*(ibl))]; + zdtforc = zdtdp*(pap[jl + klon*(jk + klev*(ibl))] - pap[jl + + klon*(-1 + jk + klev*(ibl))]); + //[#Note: Diagnostic mixed phase should be replaced below] + zdqs = zanewm1*zdtforc*zdqsmixdt; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlfinal = cl::sycl::fmax((double) 0.0, zlcust[jm] - zdqs); //lim to zero + // no supersaturation allowed incloud ---V + zevap = cl::sycl::fmin((zlcust[jm] - zlfinal), zevaplimmix); + // ZEVAP=0.0_JPRB + zlfinal = zlcust[jm] - zevap; + zlfinalsum = zlfinalsum + zlfinal; // sum + + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zlcust[jm]; // whole sum + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zevap; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zevap; + } + } + + // Reset the cloud contribution if no cloud water survives to this level: + if (zlfinalsum < zepsec) { + zacust = (double) 0.0; + } + zsolac = zsolac + zacust; + + } + // on JK>NCLDTOP + + //--------------------------------------------------------------------- + // Subsidence sink of cloud to the layer below + // (Implicit - re. CFL limit on convective mass flux) + //--------------------------------------------------------------------- + + + if (jk + 1 < klev) { + + zmfdn = cl::sycl::fmax((double) 0.0, (pmfu[jl + klon*(1 + jk + klev*(ibl))] + + pmfd[jl + klon*(1 + jk + klev*(ibl))])*zdtgdp); + + zsolab = zsolab + zmfdn; + zsolqb[0 + 5*(0)] = zsolqb[0 + 5*(0)] + zmfdn; + zsolqb[1 + 5*(1)] = zsolqb[1 + 5*(1)] + zmfdn; + + // Record sink for cloud budget and enthalpy budget diagnostics + zconvsink[0] = zmfdn; + zconvsink[1] = zmfdn; + + } + + + //---------------------------------------------------------------------- + // 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + //---------------------------------------------------------------------- + // NOTE: In default tiedtke scheme this process decreases the cloud + // area but leaves the specific cloud water content + // within clouds unchanged + //---------------------------------------------------------------------- + + // ------------------------------ + // Define turbulent erosion rate + // ------------------------------ + zldifdt = (*yrecldp).rcldiff*ptsphy; //original version + //Increase by factor of 5 for convective points + if (ktype[jl + klon*(ibl)] > 0 && plude[jl + klon*(jk + klev*( + ibl))] > zepsec) { + zldifdt = (*yrecldp).rcldiff_convi*zldifdt; + } + + // At the moment, works on mixed RH profile and partitioned ice/liq fraction + // so that it is similar to previous scheme + // Should apply RHw for liquid cloud and RHi for ice cloud separately + if (zli[jk] > zepsec) { + // Calculate environmental humidity + // ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + // & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + // ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ze = zldifdt*cl::sycl::fmax(zqsmix[jk] - zqx[jk + klev*(4)], (double) 0.0); + zleros = za[jk]*ze; + zleros = cl::sycl::fmin(zleros, zevaplimmix); + zleros = cl::sycl::fmin(zleros, zli[jk]); + zaeros = zleros / zlicld; //if linear term + + // Erosion is -ve LINEAR in L,A + zsolac = zsolac - zaeros; //linear + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jk]*zleros; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jk]*zleros; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jk]*zleros; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jk]*zleros; + + } + + //---------------------------------------------------------------------- + // 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + //---------------------------------------------------------------------- + // calculate dqs/dt + // Note: For the separate prognostic Qi and Ql, one would ideally use + // Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + // forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + // These would then instantaneous freeze if T<-38C or lead to ice growth + // by deposition in warmer mixed phase clouds. However, since we do + // not have a separate prognostic equation for in-cloud humidity or a + // statistical scheme approach in place, the depositional growth of ice + // in the mixed phase can not be modelled and we resort to supersaturation + // wrt ice instanteously converting to ice over one timestep + // (see Tompkins et al. QJRMS 2007 for details) + // Thus for the initial implementation the diagnostic mixed phase is + // retained for the moment, and the level of approximation noted. + //---------------------------------------------------------------------- + + zdtdp = zrdcp*ztp1[jk] / pap[jl + klon*(jk + klev*(ibl))]; + zdpmxdt = zdp*zqtmst; + zmfdn = (double) 0.0; + if (jk + 1 < klev) { + zmfdn = pmfu[jl + klon*(1 + jk + klev*(ibl))] + pmfd[jl + klon*(1 + + jk + klev*(ibl))]; + } + zwtot = pvervel[jl + klon*(jk + klev*(ibl))] + (double) 0.5*rg*(pmfu[ + jl + klon*(jk + klev*(ibl))] + pmfd[jl + klon*(jk + klev*(ibl))] + + zmfdn); + zwtot = cl::sycl::fmin(zdpmxdt, cl::sycl::fmax(-zdpmxdt, zwtot)); + zzzdt = phrsw[jl + klon*(jk + klev*(ibl))] + phrlw[jl + klon*(jk + + klev*(ibl))]; + zdtdiab = cl::sycl::fmin(zdpmxdt*zdtdp, cl::sycl::fmax(-zdpmxdt*zdtdp, zzzdt))*ptsphy + ralfdcp*zldefr; + // Note: ZLDEFR should be set to the difference between the mixed phase functions + // in the convection and cloud scheme, but this is not calculated, so is zero and + // the functions must be the same + zdtforc = zdtdp*zwtot*ptsphy + zdtdiab; + zqold = zqsmix[jk]; + ztold = ztp1[jk]; + ztp1[jk] = ztp1[jk] + zdtforc; + ztp1[jk] = cl::sycl::fmax(ztp1[jk], (double) 160.0); + llflag = true; + + // Formerly a call to CUADJTQ(..., ICALL=5) + zqp = (double) 1.0 / pap[jl + klon*(jk + klev*(ibl))]; + zqsat = ((double)(r2es*((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*cl::sycl::exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)) + (1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies)))))*zqp; + zqsat = cl::sycl::fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond = (zqsmix[jk] - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*r5alvcp)*(1.0/cl::sycl::pow(ztp1[jk] - r4les, 2.0)) + ((1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*r5alscp)*(1.0/cl::sycl::pow(ztp1[jk] - r4ies, 2.0))))); + ztp1[jk] = ztp1[jk] + ((double)((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*ralvdcp + (1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*ralsdcp))*zcond; + zqsmix[jk] = zqsmix[jk] - zcond; + zqsat = ((double)(r2es*((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*cl::sycl::exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)) + (1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies)))))*zqp; + zqsat = cl::sycl::fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond1 = (zqsmix[jk] - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*r5alvcp)*(1.0/cl::sycl::pow(ztp1[jk] - r4les, 2.0)) + ((1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*r5alscp)*(1.0/cl::sycl::pow(ztp1[jk] - r4ies, 2.0))))); + ztp1[jk] = ztp1[jk] + ((double)((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*ralvdcp + (1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*ralsdcp))*zcond1; + zqsmix[jk] = zqsmix[jk] - zcond1; + + zdqs = zqsmix[jk] - zqold; + zqsmix[jk] = zqold; + ztp1[jk] = ztold; + + //---------------------------------------------------------------------- + // 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + // ---------------------------------------------------------------------- + // Erosion term is LINEAR in L + // Changed to be uniform distribution in cloud region + + + // Previous function based on DELTA DISTRIBUTION in cloud: + if (zdqs > (double) 0.0) { + // If subsidence evaporation term is turned off, then need to use updated + // liquid and cloud here? + // ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + zlevap = za[jk]*cl::sycl::fmin(zdqs, zlicld); + zlevap = cl::sycl::fmin(zlevap, zevaplimmix); + zlevap = cl::sycl::fmin(zlevap, cl::sycl::fmax(zqsmix[jk] - zqx[jk + klev*(4)], (double) 0.0)); + + // For first guess call + zlevapl = zliqfrac[jk]*zlevap; + zlevapi = zicefrac[jk]*zlevap; + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jk]*zlevap; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jk]*zlevap; + + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jk]*zlevap; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jk]*zlevap; + + } + + + //---------------------------------------------------------------------- + // 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + //---------------------------------------------------------------------- + // (1) Increase of cloud water in existing clouds + if (za[jk] > zepsec && zdqs <= -(*yrecldp).rlmin) { + + zlcond1 = cl::sycl::fmax(-zdqs, (double) 0.0); //new limiter + + //old limiter (significantly improves upper tropospheric humidity rms) + if (za[jk] > (double) 0.99) { + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsmix[jk]); + zcdmax = (zqx[jk + klev*(4)] - zqsmix[jk]) / ((double) 1.0 + + zcor*zqsmix[jk]*((double)(((double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0)))*r5alvcp)*(1.0/cl::sycl::pow(ztp1[jk] - r4les, 2.0)) + ((1.0 - (double)(cl::sycl::fmin(1.0, cl::sycl::pow((cl::sycl::fmax(rtice, cl::sycl::fmin(rtwat, ztp1[jk])) - rtice)*rtwat_rtice_r, 2.0))))*r5alscp)*(1.0/cl::sycl::pow(ztp1[jk] - r4ies, 2.0))))); + } else { + zcdmax = (zqx[jk + klev*(4)] - za[jk]*zqsmix[jk]) / za[jk]; + } + zlcond1 = cl::sycl::fmax(cl::sycl::fmin(zlcond1, zcdmax), (double) 0.0); + // end old limiter + + zlcond1 = za[jk]*zlcond1; + if (zlcond1 < (*yrecldp).rlmin) { + zlcond1 = (double) 0.0; + } + + //------------------------------------------------------------------------- + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------- + if (ztp1[jk] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond1; + zqxfg[0] = zqxfg[0] + zlcond1; + } else { + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond1; + zqxfg[1] = zqxfg[1] + zlcond1; + } + } + + // (2) Generation of new clouds (da/dt>0) + + + if (zdqs <= -(*yrecldp).rlmin && za[jk] < (double) 1.0 - zepsec) { + + //--------------------------- + // Critical relative humidity + //--------------------------- + zrhc = (*yrecldp).ramid; + zsigk = pap[jl + klon*(jk + klev*(ibl))] / paph[jl + klon*(klev + + (klev + 1)*(ibl))]; + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + if (zsigk > (double) 0.8) { + zrhc = (*yrecldp).ramid + ((double) 1.0 - (*yrecldp).ramid)*(cl::sycl::pow(((zsigk - + (double) 0.8) / (double) 0.2), 2.0)); + } + + // Commented out for CY37R1 to reduce humidity in high trop and strat + // ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + // ZBOTT=ZTRPAUS(JL)+0.2_JPRB + // IF(ZSIGK < ZBOTT) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + // ENDIF + + //--------------------------- + // Supersaturation options + //--------------------------- + if ((*yrecldp).nssopt == 0) { + // No scheme + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / cl::sycl::fmax(zepsec, (double) 1.0 + - za[jk]); + zqe = cl::sycl::fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 1) { + // Tompkins + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / cl::sycl::fmax(zepsec, (double) 1.0 + - za[jk]); + zqe = cl::sycl::fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 2) { + // Lohmann and Karcher + zqe = zqx[jk + klev*(4)]; + } else if ((*yrecldp).nssopt == 3) { + // Gierens + zqe = zqx[jk + klev*(4)] + zli[jk]; + } + + if (ztp1[jk] >= rtt || (*yrecldp).nssopt == 0) { + // No ice supersaturation allowed + zfac = (double) 1.0; + } else { + // Ice supersaturation + zfac = zfokoop; + } + + if (zqe >= zrhc*zqsice[jk]*zfac && zqe < zqsice[jk]*zfac) { + // note: not **2 on 1-a term if ZQE is used. + // Added correction term ZFAC to numerator 15/03/2010 + zacond = -((double) 1.0 - za[jk])*zfac*zdqs / cl::sycl::fmax((double) + 2.0*(zfac*zqsice[jk] - zqe), zepsec); + + zacond = cl::sycl::fmin(zacond, (double) 1.0 - za[jk]); //PUT THE LIMITER BACK + + // Linear term: + // Added correction term ZFAC 15/03/2010 + zlcond2 = -zfac*zdqs*(double) 0.5*zacond; //mine linear + + // new limiter formulation + zzdl = + (double) 2.0*(zfac*zqsice[jk] - zqe) / cl::sycl::fmax(zepsec, (double) 1.0 - za[jk]); + // Added correction term ZFAC 15/03/2010 + if (zfac*zdqs < -zzdl) { + // ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + zlcondlim = (za[jk] - (double) 1.0)*zfac*zdqs - zfac*zqsice[jk] + zqx[jk + + klev*(4)]; + zlcond2 = cl::sycl::fmin(zlcond2, zlcondlim); + } + zlcond2 = cl::sycl::fmax(zlcond2, (double) 0.0); + + if (zlcond2 < (*yrecldp).rlmin || ((double) 1.0 - za[jk]) < zepsec) { + zlcond2 = (double) 0.0; + zacond = (double) 0.0; + } + if (zlcond2 == (double) 0.0) { + zacond = (double) 0.0; + } + + // Large-scale generation is LINEAR in A and LINEAR in L + zsolac = zsolac + zacond; //linear + + //------------------------------------------------------------------------ + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------ + if (ztp1[jk] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond2; + zqxfg[0] = zqxfg[0] + zlcond2; + } else { + // homogeneous freezing + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond2; + zqxfg[1] = zqxfg[1] + zlcond2; + } + + } + } + + //---------------------------------------------------------------------- + // 3.7 Growth of ice by vapour deposition + //---------------------------------------------------------------------- + // Following Rotstayn et al. 2001: + // does not use the ice nuclei number from cloudaer.F90 + // but rather a simple Meyers et al. 1992 form based on the + // supersaturation and assuming clouds are saturated with + // respect to liquid water (well mixed), (or Koop adjustment) + // Growth considered as sink of liquid water if present so + // Bergeron-Findeisen adjustment in autoconversion term no longer needed + //---------------------------------------------------------------------- + + //-------------------------------------------------------- + //- + //- Ice deposition following Rotstayn et al. (2001) + //- (monodisperse ice particle size distribution) + //- + //-------------------------------------------------------- + if (idepice == 1) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[-1 + jk] < (*yrecldp).rcldtopcf && za[jk] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*cl::sycl::exp((double) 12.96*(zvpliq - zvpice) / zvpliq - + (double) 0.639); + + //------------------------------------------------ + // 2.4e-2 is conductivity of air + // 8.8 = 700**1/3 = density of ice to the third + //------------------------------------------------ + zadd = + rlstt*(rlstt / (rv*ztp1[jk]) - (double) 1.0) / ((double) 2.4E-2*ztp1[jk]); + zbdd = rv*ztp1[jk]*pap[jl + klon*(jk + klev*(ibl))] / ((double) + 2.21*zvpice); + zcvds = (double) 7.8*(cl::sycl::pow((zicenuclei / zrho), (double) 0.666))*(zvpliq - + zvpice) / ((double) 8.87*(zadd + zbdd)*zvpice); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = cl::sycl::fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + //------------------ + // new value of ice: + //------------------ + zinew = cl::sycl::pow(((double) 0.666*zcvds*ptsphy + (cl::sycl::pow(zice0, (double) 0.666))), + (double) 1.5); + + //--------------------------- + // grid-mean deposition rate: + //--------------------------- + zdepos = cl::sycl::fmax(za[jk]*(zinew - zice0), (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = cl::sycl::fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = cl::sycl::fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*cl::sycl::fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + + } + + //-------------------------------------------------------- + //- + //- Ice deposition assuming ice PSD + //- + //-------------------------------------------------------- + } else if (idepice == 2) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[-1 + jk] < (*yrecldp).rcldtopcf && za[jk] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*cl::sycl::exp((double) 12.96*(zvpliq - zvpice) / zvpliq - + (double) 0.639); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = cl::sycl::fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + // Particle size distribution + ztcg = (double) 1.0; + zfacx1i = (double) 1.0; + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk] + + pap[jl + klon*(jk + klev*(ibl))]*(*yrecldp).rcl_apb3*(cl::sycl::pow(ztp1[jk], + (double) 3.)); + zcorrfac = cl::sycl::pow(((double) 1.0 / zrho), (double) 0.5); + zcorrfac2 = (cl::sycl::pow((ztp1[jk] / (double) 273.0), (double) 1.5))*((double) 393.0 / + (ztp1[jk] + (double) 120.0)); + + zpr02 = zrho*zice0*(*yrecldp).rcl_const1i / (ztcg*zfacx1i); + + zterm1 = (zvpliq - zvpice)*(cl::sycl::pow(ztp1[jk], (double) 2.0)) + *zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2i*zfacx1i / (zrho*zaplusb*zvpice) + ; + zterm2 = (double) 0.65*(*yrecldp).rcl_const6i*(cl::sycl::pow(zpr02, (*yrecldp) + .rcl_const4i)) + (*yrecldp).rcl_const3i*(cl::sycl::pow(zcorrfac, (double) 0.5)) + *(cl::sycl::pow(zrho, (double) 0.5))*(cl::sycl::pow(zpr02, (*yrecldp).rcl_const5i)) / + (cl::sycl::pow(zcorrfac2, (double) 0.5)); + + zdepos = cl::sycl::fmax(za[jk]*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = cl::sycl::fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = cl::sycl::fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*cl::sycl::fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + } + + } + // on IDEPICE + + //###################################################################### + // 4 *** PRECIPITATION PROCESSES *** + //###################################################################### + + //---------------------------------- + // revise in-cloud consensate amount + //---------------------------------- + ztmpa = (double) 1.0 / cl::sycl::fmax(za[jk], zepsec); + zliqcld = zqxfg[0]*ztmpa; + zicecld = zqxfg[1]*ztmpa; + zlicld = zliqcld + zicecld; + + //---------------------------------------------------------------------- + // 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + // now that rain, snow, graupel species are prognostic + // the precipitation flux can be defined directly level by level + // There is no vertical memory required from the flux variable + //---------------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm] || jm + 1 == 2) { + //------------------------ + // source from layer above + //------------------------ + if (jk + 1 > (*yrecldp).ncldtop) { + zfallsrce[jm] = zpfplsx[jk + (klev + 1)*jm]*zdtgdp; + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zfallsrce[jm]; + zqxfg[jm] = zqxfg[jm] + zfallsrce[jm]; + // use first guess precip----------V + zqpretot = zqpretot + zqxfg[jm]; + } + //------------------------------------------------- + // sink to next layer, constant fall speed + //------------------------------------------------- + // if aerosol effect then override + // note that for T>233K this is the same as above. + if ((*yrecldp).laericesed && jm + 1 == 2) { + zre_ice = pre_ice[jl + klon*(jk + klev*(ibl))]; + // The exponent value is from + // Morrison et al. JAS 2005 Appendix + zvqx[1] = (double) 0.002*(cl::sycl::pow(zre_ice, (double) 1.0)); + } + zfall = zvqx[jm]*zrho; + //------------------------------------------------- + // modified by Heymsfield and Iaquinta JAS 2000 + //------------------------------------------------- + // ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + // &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + zfallsink[jm] = zdtgdp*zfall; + // Cloud budget diagnostic stored at end as implicit + // jl + } + // LLFALL + } + // jm + + //--------------------------------------------------------------- + // Precip cover overlap using MAX-RAN Overlap + // Since precipitation is now prognostic we must + // 1) apply an arbitrary minimum coverage (0.3) if precip>0 + // 2) abandon the 2-flux clr/cld treatment + // 3) Thus, since we have no memory of the clear sky precip + // fraction, we mimic the previous method by reducing + // ZCOVPTOT(JL), which has the memory, proportionally with + // the precip evaporation rate, taking cloud fraction + // into account + // #3 above leads to much smoother vertical profiles of + // precipitation fraction than the Klein-Jakob scheme which + // monotonically increases precip fraction and then resets + // it to zero in a step function once clear-sky precip reaches + // zero. + //--------------------------------------------------------------- + if (zqpretot > zepsec) { + zcovptot = (double) 1.0 - (((double) 1.0 - zcovptot)*((double) 1.0 - cl::sycl::fmax(za[jk], + za[-1 + jk])) / ((double) 1.0 - cl::sycl::fmin(za[-1 + jk], (double) 1.0 - (double) + 1.E-06))); + zcovptot = cl::sycl::fmax(zcovptot, (*yrecldp).rcovpmin); + zcovpclr = cl::sycl::fmax((double) 0.0, zcovptot - za[jk]); // clear sky proportion + zraincld = zqxfg[2] / zcovptot; + zsnowcld = zqxfg[3] / zcovptot; + zcovpmax = cl::sycl::fmax(zcovptot, zcovpmax); + } else { + zraincld = (double) 0.0; + zsnowcld = (double) 0.0; + zcovptot = (double) 0.0; // no flux - reset cover + zcovpclr = (double) 0.0; // reset clear sky proportion + zcovpmax = (double) 0.0; // reset max cover for ZZRH calc + } + + //---------------------------------------------------------------------- + // 4.3a AUTOCONVERSION TO SNOW + //---------------------------------------------------------------------- + + if (ztp1[jk] <= rtt) { + //----------------------------------------------------- + // Snow Autoconversion rate follow Lin et al. 1983 + //----------------------------------------------------- + if (zicecld > zepsec) { + + zzco = ptsphy*(*yrecldp).rsnowlin1*cl::sycl::exp((*yrecldp).rsnowlin2*(ztp1[jk] - rtt)); + + if ((*yrecldp).laericeauto) { + zlcrit = picrit_aer[jl + klon*(jk + klev*(ibl))]; + // 0.3 = N**0.333 with N=0.027 + zzco = zzco*(cl::sycl::pow(((*yrecldp).rnice / pnice[jl + klon*(jk + klev*( + ibl))]), (double) 0.333)); + } else { + zlcrit = (*yrecldp).rlcritsnow; + } + + zsnowaut = zzco*((double) 1.0 - cl::sycl::exp(-(cl::sycl::pow((zicecld / zlcrit), 2.0)))); + zsolqb[3 + 5*(1)] = zsolqb[3 + 5*(1)] + zsnowaut; + + } + } + + //---------------------------------------------------------------------- + // 4.3b AUTOCONVERSION WARM CLOUDS + // Collection and accretion will require separate treatment + // but for now we keep this simple treatment + //---------------------------------------------------------------------- + + if (zliqcld > zepsec) { + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Sundqvist (1989) + //- + //-------------------------------------------------------- + if (iwarmrain == 1) { + + zzco = (*yrecldp).rkconv*ptsphy; + + if ((*yrecldp).laerliqautolsp) { + zlcrit = plcrit_aer[jl + klon*(jk + klev*(ibl))]; + // 0.3 = N**0.333 with N=125 cm-3 + zzco = zzco*(cl::sycl::pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*(ibl) + )]), (double) 0.333)); + } else { + // Modify autoconversion threshold dependent on: + // land (polluted, high CCN, smaller droplets, higher threshold) + // sea (clean, low CCN, larger droplets, lower threshold) + if (plsm[jl + klon*(ibl)] > (double) 0.5) { + zlcrit = (*yrecldp).rclcrit_land; // land + } else { + zlcrit = (*yrecldp).rclcrit_sea; // ocean + } + } + + //------------------------------------------------------------------ + // Parameters for cloud collection by rain and snow. + // Note that with new prognostic variable it is now possible + // to REPLACE this with an explicit collection parametrization + //------------------------------------------------------------------ + zprecip = (zpfplsx[jk + (klev + 1)*(3)] + zpfplsx[jk + (klev + 1)*(2) + ]) / cl::sycl::fmax(zepsec, zcovptot); + zcfpr = (double) 1.0 + (*yrecldp).rprc1*cl::sycl::sqrt(cl::sycl::fmax(zprecip, (double) 0.0)); + // ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + // &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + if ((*yrecldp).laerliqcoll) { + // 5.0 = N**0.333 with N=125 cm-3 + zcfpr = zcfpr*(cl::sycl::pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*( + ibl))]), (double) 0.333)); + } + + zzco = zzco*zcfpr; + zlcrit = zlcrit / cl::sycl::fmax(zcfpr, zepsec); + + if (zliqcld / zlcrit < (double) 20.0) { + // Security for exp for some compilers + zrainaut = zzco*((double) 1.0 - cl::sycl::exp(-(cl::sycl::pow((zliqcld / zlcrit), 2.0)))); + } else { + zrainaut = zzco; + } + + // rain freezes instantly + if (ztp1[jk] <= rtt) { + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zrainaut; + } else { + zsolqb[2 + 5*(0)] = zsolqb[2 + 5*(0)] + zrainaut; + } + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Khairoutdinov and Kogan (2000) + //- + //-------------------------------------------------------- + } else if (iwarmrain == 2) { + + if (plsm[jl + klon*(ibl)] > (double) 0.5) { + // land + zconst = (*yrecldp).rcl_kk_cloud_num_land; + zlcrit = (*yrecldp).rclcrit_land; + } else { + // ocean + zconst = (*yrecldp).rcl_kk_cloud_num_sea; + zlcrit = (*yrecldp).rclcrit_sea; + } + + if (zliqcld > zlcrit) { + + zrainaut = (double) 1.5*za[jk]*ptsphy*(*yrecldp).rcl_kkaau*(cl::sycl::pow(zliqcld, + (*yrecldp).rcl_kkbauq))*(cl::sycl::pow(zconst, (*yrecldp).rcl_kkbaun)); + + zrainaut = cl::sycl::fmin(zrainaut, zqxfg[0]); + if (zrainaut < zepsec) { + zrainaut = (double) 0.0; + } + + zrainacc = (double) 2.0*za[jk]*ptsphy*(*yrecldp) + .rcl_kkaac*(cl::sycl::pow((zliqcld*zraincld), (*yrecldp).rcl_kkbac)); + + zrainacc = cl::sycl::fmin(zrainacc, zqxfg[0]); + if (zrainacc < zepsec) { + zrainacc = (double) 0.0; + } + + } else { + zrainaut = (double) 0.0; + zrainacc = (double) 0.0; + } + + // If temperature < 0, then autoconversion produces snow rather than rain + // Explicit + if (ztp1[jk] <= rtt) { + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainaut; + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainaut; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainacc; + } else { + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainaut; + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainaut; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainacc; + } + + } + // on IWARMRAIN + + } + // on ZLIQCLD > ZEPSEC + + + //---------------------------------------------------------------------- + // RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + // only active if T<0degC and supercooled liquid water is present + // AND if not Sundquist autoconversion (as this includes riming) + //---------------------------------------------------------------------- + if (iwarmrain > 1) { + + if (ztp1[jk] <= rtt && zliqcld > zepsec) { + + // Fallspeed air density correction + zfallcorr = cl::sycl::pow(((*yrecldp).rdensref / zrho), (double) 0.4); + + //------------------------------------------------------------------ + // Riming of snow by cloud water - implicit in lwc + //------------------------------------------------------------------ + if (zsnowcld > zepsec && zcovptot > (double) 0.01) { + + // Calculate riming term + // Factor of liq water taken out because implicit + zsnowrime = (double) 0.3*zcovptot*ptsphy*(*yrecldp) + .rcl_const7s*zfallcorr*(cl::sycl::pow((zrho*zsnowcld*(*yrecldp).rcl_const1s), + (*yrecldp).rcl_const8s)); + + // Limit snow riming term + zsnowrime = cl::sycl::fmin(zsnowrime, (double) 1.0); + + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zsnowrime; + + } + + //------------------------------------------------------------------ + // Riming of ice by cloud water - implicit in lwc + // NOT YET ACTIVE + //------------------------------------------------------------------ + // IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + // + // ! Calculate riming term + // ! Factor of liq water taken out because implicit + // ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + // & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + // + // ! Limit ice riming term + // ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + // + // ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + // + // ENDIF + } + + } + // on IWARMRAIN > 1 + + + //---------------------------------------------------------------------- + // 4.4a MELTING OF SNOW and ICE + // with new implicit solver this also has to treat snow or ice + // precipitating from the level above... i.e. local ice AND flux. + // in situ ice and snow: could arise from LS advection or warming + // falling ice and snow: arrives by precipitation process + //---------------------------------------------------------------------- + + zicetot = zqxfg[1] + zqxfg[3]; + zmeltmax = (double) 0.0; + + // If there are frozen hydrometeors present and dry-bulb temperature > 0degC + if (zicetot > zepsec && ztp1[jk] > rtt) { + + // Calculate subsaturation + zsubsat = cl::sycl::fmax(zqsice[jk] - zqx[jk + klev*(4)], (double) 0.0); + + // Calculate difference between dry-bulb (ZTP1) and the temperature + // at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + // Melting only occurs if the wet-bulb temperature >0 + // i.e. warming of ice particle due to melting > cooling + // due to evaporation. + ztdmtw0 = ztp1[jk] - rtt - zsubsat*(ztw1 + ztw2*(pap[jl + klon*(jk + + klev*(ibl))] - ztw3) - ztw4*(ztp1[jk] - ztw5)); + // Not implicit yet... + // Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*ztdmtw0) / (*yrecldp).rtaumel); + zmeltmax = cl::sycl::fmax(ztdmtw0*zcons1*zrldcp, (double) 0.0); + } + + // Loop over frozen hydrometeors (ice, snow) + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (iphase[jm] == 2) { + jn = imelt[jm]; + if (zmeltmax > zepsec && zicetot > zepsec) { + // Apply melting in same proportion as frozen hydrometeor fractions + zalfa = zqxfg[jm] / zicetot; + zmelt = cl::sycl::fmin(zqxfg[jm], zalfa*zmeltmax); + // needed in first guess + // This implies that zqpretot has to be recalculated below + // since is not conserved here if ice falls and liquid doesn't + zqxfg[jm] = zqxfg[jm] - zmelt; + zqxfg[-1 + jn] = zqxfg[-1 + jn] + zmelt; + zsolqa[-1 + jn + 5*jm] = zsolqa[-1 + jn + 5*jm] + zmelt; + zsolqa[jm + 5*(-1 + jn)] = zsolqa[jm + 5*(-1 + jn)] - zmelt; + } + } + } + + //---------------------------------------------------------------------- + // 4.4b FREEZING of RAIN + //---------------------------------------------------------------------- + + // If rain present + if (zqx[jk + klev*(2)] > zepsec) { + + if (ztp1[jk] <= rtt && ztp1[-1 + jk] > rtt) { + // Base of melting layer/top of refreezing layer so + // store rain/snow fraction for precip type diagnosis + // If mostly rain, then supercooled rain slow to freeze + // otherwise faster to freeze (snow or ice pellets) + zqpretot = cl::sycl::fmax(zqx[jk + klev*(3)] + zqx[jk + klev*(2)], zepsec); + prainfrac_toprfz[jl + klon*(ibl)] = + zqx[jk + klev*(2)] / zqpretot; + if (prainfrac_toprfz[jl + klon*(ibl)] > 0.8) { + llrainliq = true; + } else { + llrainliq = false; + } + } + + // If temperature less than zero + if (ztp1[jk] < rtt) { + + if (prainfrac_toprfz[jl + klon*(ibl)] > 0.8) { + + // Majority of raindrops completely melted + // Refreezing is by slow heterogeneous freezing + + // Slope of rain particle size distribution + zlambda = cl::sycl::pow(((*yrecldp).rcl_fac1 / (zrho*zqx[jk + klev*(2)])), + (*yrecldp).rcl_fac2); + + // Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ztemp = (*yrecldp).rcl_fzrab*(ztp1[jk] - rtt); + zfrz = ptsphy*((*yrecldp).rcl_const5r / zrho)*(cl::sycl::exp(ztemp) - (double) 1.) + *(cl::sycl::pow(zlambda, (*yrecldp).rcl_const6r)); + zfrzmax = cl::sycl::fmax(zfrz, (double) 0.0); + + } else { + + // Majority of raindrops only partially melted + // Refreeze with a shorter timescale (reverse of melting...for now) + + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*(rtt - ztp1[jk])) / + (*yrecldp).rtaumel); + zfrzmax = cl::sycl::fmax((rtt - ztp1[jk])*zcons1*zrldcp, (double) 0.0); + + } + + if (zfrzmax > zepsec) { + zfrz = cl::sycl::fmin(zqx[jk + klev*(2)], zfrzmax); + zsolqa[3 + 5*(2)] = zsolqa[3 + 5*(2)] + zfrz; + zsolqa[2 + 5*(3)] = zsolqa[2 + 5*(3)] - zfrz; + } + } + + } + + + //---------------------------------------------------------------------- + // 4.4c FREEZING of LIQUID + //---------------------------------------------------------------------- + // not implicit yet... + zfrzmax = cl::sycl::fmax(((*yrecldp).rthomo - ztp1[jk])*zrldcp, (double) 0.0); + + jm = 1; + jn = imelt[-1 + jm]; + if (zfrzmax > zepsec && zqxfg[-1 + jm] > zepsec) { + zfrz = cl::sycl::fmin(zqxfg[-1 + jm], zfrzmax); + zsolqa[-1 + jn + 5*(-1 + jm)] = zsolqa[-1 + jn + 5*(-1 + jm)] + zfrz; + zsolqa[-1 + jm + 5*(-1 + jn)] = zsolqa[-1 + jm + 5*(-1 + jn)] - zfrz; + } + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF RAIN/SNOW + //---------------------------------------------------------------------- + + //---------------------------------------- + // Rain evaporation scheme from Sundquist + //---------------------------------------- + if (ievaprain == 1) { + + // Rain + + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + cl::sycl::fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = cl::sycl::fmin(cl::sycl::fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsliq[jk]) / cl::sycl::fmax(zepsec, (double) 1.0 - + za[jk]); + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = cl::sycl::fmax((double) 0.0, cl::sycl::fmin(zqe, zqsliq[jk])); + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq[jk]; + + if (llo1) { + // note: zpreclr is a rain flux + zpreclr = zqxfg[2]*zcovpclr / copysign(cl::sycl::fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = cl::sycl::sqrt(pap[jl + klon*(jk + klev*(ibl))] / paph[jl + + klon*(klev + (klev + 1)*(ibl))]) / (*yrecldp).rvrfactor*zpreclr / + cl::sycl::fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(double) 0.5*(cl::sycl::pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsliq; + zdpr = zcovpclr*zbeta*(zqsliq[jk] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Evaporate rain + zevap = cl::sycl::fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = cl::sycl::fmax((*yrecldp).rcovpmin, zcovptot - cl::sycl::fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + + //--------------------------------------------------------- + // Rain evaporation scheme based on Abel and Boutle (2013) + //--------------------------------------------------------- + } else if (ievaprain == 2) { + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for rain evaporation + // to avoid cloud formation and saturation of the grid box + //----------------------------------------------------------------------- + // Limit RH for rain evaporation dependent on precipitation fraction + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + cl::sycl::fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = cl::sycl::fmin(cl::sycl::fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + // Critical relative humidity + //ZRHC=RAMID + //ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + //IF(ZSIGK > 0.8_JPRB) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + //ENDIF + //ZZRH = MIN(ZRHC,ZZRH) + + // Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + zzrh = cl::sycl::fmin((double) 0.8, zzrh); + + zqe = cl::sycl::fmax((double) 0.0, cl::sycl::fmin(zqx[jk + klev*(4)], zqsliq[jk])); + + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq[jk]; + + if (llo1) { + + //------------------------------------------- + // Abel and Boutle (2012) evaporation + //------------------------------------------- + // Calculate local precipitation (kg/kg) + zpreclr = zqxfg[2] / zcovptot; + + // Fallspeed air density correction + zfallcorr = cl::sycl::pow(((*yrecldp).rdensref / zrho), 0.4); + + // Saturation vapour pressure with respect to liquid phase + zesatliq = rv / rd*((double)(r2es*cl::sycl::exp((r3les*(ztp1[jk] - rtt))/(ztp1[jk] - r4les)))); + + // Slope of particle size distribution + zlambda = cl::sycl::pow(((*yrecldp).rcl_fac1 / (zrho*zpreclr)), (*yrecldp).rcl_fac2); // ZPRECLR=kg/kg + + zevap_denom = (*yrecldp).rcl_cdenom1*zesatliq - (*yrecldp) + .rcl_cdenom2*ztp1[jk]*zesatliq + (*yrecldp).rcl_cdenom3*(cl::sycl::pow(ztp1[jk], + (double) 3.))*pap[jl + klon*(jk + klev*(ibl))]; + + // Temperature dependent conductivity + zcorr2 = (cl::sycl::pow((ztp1[jk] / (double) 273.), (double) 1.5))*(double) 393. / + (ztp1[jk] + (double) 120.); + zka = (*yrecldp).rcl_ka273*zcorr2; + + zsubsat = cl::sycl::fmax(zzrh*zqsliq[jk] - zqe, (double) 0.0); + + zbeta = ((double) 0.5 / zqsliq[jk])*(cl::sycl::pow(ztp1[jk], (double) 2.)) + *zesatliq*(*yrecldp).rcl_const1r*(zcorr2 / zevap_denom)*((double) 0.78 / + (cl::sycl::pow(zlambda, (*yrecldp).rcl_const4r)) + (*yrecldp) + .rcl_const2r*(cl::sycl::pow((zrho*zfallcorr), (double) 0.5)) / ((cl::sycl::pow(zcorr2, (double) + 0.5))*(cl::sycl::pow(zlambda, (*yrecldp).rcl_const3r)))); + + zdenom = (double) 1.0 + zbeta*ptsphy; //*ZCORQSLIQ(JL) + zdpevap = zcovpclr*zbeta*ptsphy*zsubsat / zdenom; + + //--------------------------------------------------------- + // Add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Limit rain evaporation + zevap = cl::sycl::fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = cl::sycl::fmax((*yrecldp).rcovpmin, zcovptot - cl::sycl::fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + } + // on IEVAPRAIN + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF SNOW + //---------------------------------------------------------------------- + // Snow + if (ievapsnow == 1) { + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + cl::sycl::fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = cl::sycl::fmin(cl::sycl::fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / cl::sycl::fmax(zepsec, (double) 1.0 - + za[jk]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = cl::sycl::fmax((double) 0.0, cl::sycl::fmin(zqe, zqsice[jk])); + llo1 = zcovpclr > zepsec && zqxfg[3] > zepsec && zqe < zzrh*zqsice[jk]; + + if (llo1) { + // note: zpreclr is a rain flux a + zpreclr = zqxfg[3]*zcovpclr / copysign(cl::sycl::fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = cl::sycl::sqrt(pap[jl + klon*(jk + klev*(ibl))] / paph[jl + + klon*(klev + (klev + 1)*(ibl))]) / (*yrecldp).rvrfactor*zpreclr / + cl::sycl::fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(cl::sycl::pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsice; + zdpr = zcovpclr*zbeta*(zqsice[jk] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce snow to zero and model + // produces small amounts of snowfall everywhere. + //--------------------------------------------------------- + + // Evaporate snow + zevap = cl::sycl::fmin(zdpevap, zqxfg[3]); + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = cl::sycl::fmax((*yrecldp).rcovpmin, zcovptot - cl::sycl::fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqxfg[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + //--------------------------------------------------------- + } else if (ievapsnow == 2) { + + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for snow evaporation + //----------------------------------------------------------------------- + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + cl::sycl::fmax(zepsec, (double) 1.0 - za[jk]); + zzrh = cl::sycl::fmin(cl::sycl::fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jk + klev*(4)] - za[jk]*zqsice[jk]) / cl::sycl::fmax(zepsec, (double) 1.0 - + za[jk]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = cl::sycl::fmax((double) 0.0, cl::sycl::fmin(zqe, zqsice[jk])); + llo1 = + zcovpclr > zepsec && zqx[jk + klev*(3)] > zepsec && zqe < zzrh*zqsice[jk]; + + if (llo1) { + + // Calculate local precipitation (kg/kg) + zpreclr = zqx[jk + klev*(3)] / zcovptot; + zvpice = ((double)(r2es*cl::sycl::exp((r3ies*(ztp1[jk] - rtt))/(ztp1[jk] - r4ies))))*rv / rd; + + // Particle size distribution + // ZTCG increases Ni with colder temperatures - essentially a + // Fletcher or Meyers scheme? + ztcg = (double) 1.0; //v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + // ZFACX1I modification is based on Andrew Barrett's results + zfacx1s = (double) 1.0; //v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk] + + pap[jl + klon*(jk + klev*(ibl))]*(*yrecldp).rcl_apb3*(cl::sycl::pow(ztp1[jk], + 3.0)); + zcorrfac = cl::sycl::pow((1.0 / zrho), 0.5); + zcorrfac2 = (cl::sycl::pow((ztp1[jk] / 273.0), 1.5))*(393.0 / (ztp1[jk] + 120.0)); + + zpr02 = zrho*zpreclr*(*yrecldp).rcl_const1s / (ztcg*zfacx1s); + + zterm1 = (zqsice[jk] - zqe)*(cl::sycl::pow(ztp1[jk], 2.0))*zvpice*zcorrfac2*ztcg*(*yrecldp) + .rcl_const2s*zfacx1s / (zrho*zaplusb*zqsice[jk]); + zterm2 = 0.65*(*yrecldp).rcl_const6s*(cl::sycl::pow(zpr02, (*yrecldp).rcl_const4s)) + + (*yrecldp).rcl_const3s*(cl::sycl::pow(zcorrfac, 0.5))*(cl::sycl::pow(zrho, 0.5))*(cl::sycl::pow(zpr02, + (*yrecldp).rcl_const5s)) / (cl::sycl::pow(zcorrfac2, 0.5)); + + zdpevap = cl::sycl::fmax(zcovpclr*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit evaporation to snow amount + //-------------------------------------------------------------------- + zevap = cl::sycl::fmin(zdpevap, zevaplimice); + zevap = cl::sycl::fmin(zevap, zqx[jk + klev*(3)]); + + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = cl::sycl::fmax((*yrecldp).rcovpmin, zcovptot - cl::sycl::fmax((double) 0.0, (zcovptot - + za[jk])*zevap / zqx[jk + klev*(3)])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + + } + // on IEVAPSNOW + + //-------------------------------------- + // Evaporate small precipitation amounts + //-------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm]) { + if (zqxfg[jm] < (*yrecldp).rlmin) { + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zqxfg[jm]; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zqxfg[jm]; + } + } + } + + //###################################################################### + // 5.0 *** SOLVERS FOR A AND L *** + // now use an implicit solution rather than exact solution + // solver is forward in time, upstream difference for advection + //###################################################################### + + //--------------------------- + // 5.1 solver for cloud cover + //--------------------------- + zanew = (za[jk] + zsolac) / ((double) 1.0 + zsolab); + zanew = cl::sycl::fmin(zanew, (double) 1.0); + if (zanew < (*yrecldp).ramin) { + zanew = (double) 0.0; + } + zda = zanew - zaorig[jk]; + //--------------------------------- + // variables needed for next level + //--------------------------------- + zanewm1 = zanew; + + //-------------------------------- + // 5.2 solver for the microphysics + //-------------------------------- + + //-------------------------------------------------------------- + // Truncate explicit sinks to avoid negatives + // Note: Species are treated in the order in which they run out + // since the clipping will alter the balance for the other vars + //-------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + llindex3[jn + 5*jm] = false; + } + zsinksum[jm] = (double) 0.0; + } + + //---------------------------- + // collect sink terms and mark + //---------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsinksum[jm] = zsinksum[jm] - zsolqa[jm + 5*jn]; // +ve total is bad + } + } + + //--------------------------------------- + // calculate overshoot and scaling factor + //--------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zmax = cl::sycl::fmax(zqx[jk + klev*jm], zepsec); + zrat = cl::sycl::fmax(zsinksum[jm], zmax); + zratio[jm] = zmax / zrat; + } + + //-------------------------------------------- + // scale the sink terms, in the correct order, + // recalculating the scale factor each time + //-------------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zsinksum[jm] = (double) 0.0; + } + + //---------------- + // recalculate sum + //---------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + psum_solqa = 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + psum_solqa = psum_solqa + zsolqa[jm + 5*jn]; + } + // ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + zsinksum[jm] = zsinksum[jm] - psum_solqa; + //--------------------------- + // recalculate scaling factor + //--------------------------- + zmm = cl::sycl::fmax(zqx[jk + klev*jm], zepsec); + zrr = cl::sycl::fmax(zsinksum[jm], zmm); + zratio[jm] = zmm / zrr; + //------ + // scale + //------ + zzratio = zratio[jm]; + //DIR$ IVDEP + //DIR$ PREFERVECTOR + for (jn = 0; jn <= 5 + -1; jn += 1) { + if (zsolqa[jm + 5*jn] < (double) 0.0) { + zsolqa[jm + 5*jn] = zsolqa[jm + 5*jn]*zzratio; + zsolqa[jn + 5*jm] = zsolqa[jn + 5*jm]*zzratio; + } + } + } + + //-------------------------------------------------------------- + // 5.2.2 Solver + //------------------------ + + //------------------------ + // set the LHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + //---------------------------------------------- + // diagonals: microphysical sink terms+transport + //---------------------------------------------- + if (jn + 1 == jm + 1) { + zqlhs[jn + 5*jm] = (double) 1.0 + zfallsink[jm]; + for (jo = 0; jo <= 5 + -1; jo += 1) { + zqlhs[jn + 5*jm] = zqlhs[jn + 5*jm] + zsolqb[jo + 5*jn]; + } + //------------------------------------------ + // non-diagonals: microphysical source terms + //------------------------------------------ + } else { + zqlhs[jn + 5*jm] = -zsolqb[jn + 5*jm]; // here is the delta T - missing from doc. + } + } + } + + //------------------------ + // set the RHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + //--------------------------------- + // sum the explicit source and sink + //--------------------------------- + zexplicit = (double) 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + zexplicit = zexplicit + zsolqa[jm + 5*jn]; // sum over middle index + } + zqxn[jm] = zqx[jk + klev*jm] + zexplicit; + } + + //----------------------------------- + // *** solve by LU decomposition: *** + //----------------------------------- + + // Note: This fast way of solving NCLVxNCLV system + // assumes a good behaviour (i.e. non-zero diagonal + // terms with comparable orders) of the matrix stored + // in ZQLHS. For the moment this is the case but + // be aware to preserve it when doing eventual + // modifications. + + // Non pivoting recursive factorization + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + // number of steps + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + // row index + zqlhs[jm + 5*jn] = zqlhs[jm + 5*jn] / zqlhs[jn + 5*jn]; + for (ik = jn + 1; ik <= 5 + -1; ik += 1) { + // column index + zqlhs[jm + 5*ik] = zqlhs[jm + 5*ik] - zqlhs[jm + 5*jn]*zqlhs[jn + 5*ik]; + } + } + } + + // Backsubstitution + // step 1 + for (jn = 1; jn <= 5 + -1; jn += 1) { + for (jm = 0; jm <= jn + 1 - 1 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + } + // step 2 + zqxn[4] = zqxn[4] / zqlhs[4 + 5*(4)]; + for (jn = -2 + 5; jn >= 1 + -1; jn += -1) { + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + zqxn[jn] = zqxn[jn] / zqlhs[jn + 5*jn]; + } + + // Ensure no small values (including negatives) remain in cloud variables nor + // precipitation rates. + // Evaporate l,i,r,s to water vapour. Latent heating taken into account below + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + if (zqxn[jn] < zepsec) { + zqxn[4] = zqxn[4] + zqxn[jn]; + zqxn[jn] = (double) 0.0; + } + } + + //-------------------------------- + // variables needed for next level + //-------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxnm1[jm] = zqxn[jm]; + zqxn2d[jk + klev*jm] = zqxn[jm]; + } + + //------------------------------------------------------------------------ + // 5.3 Precipitation/sedimentation fluxes to next level + // diagnostic precipitation fluxes + // It is this scaled flux that must be used for source to next layer + //------------------------------------------------------------------------ + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[1 + jk + (klev + 1)*jm] = zfallsink[jm]*zqxn[jm]*zrdtgdp; + } + + // Ensure precipitation fraction is zero if no precipitation + zqpretot = + zpfplsx[1 + jk + (klev + 1)*(3)] + zpfplsx[1 + jk + (klev + 1)*(2)]; + if (zqpretot < zepsec) { + zcovptot = (double) 0.0; + } + + //###################################################################### + // 6 *** UPDATE TENDANCIES *** + //###################################################################### + + //-------------------------------- + // 6.1 Temperature and CLV budgets + //-------------------------------- + + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + + // calculate fluxes in and out of box for conservation of TL + zfluxq[jm] = zpsupsatsrce[jm] + zconvsrce[jm] + zfallsrce[jm] - (zfallsink[jm] + + zconvsink[jm])*zqxn[jm]; + + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = tendency_loc_t[jl + + klon*(jk + klev*(ibl))] + ralvdcp*(zqxn[jm] - zqx[jk + klev*jm] - + zfluxq[jm])*zqtmst; + } + + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*(ibl))] = tendency_loc_t[jl + + klon*(jk + klev*(ibl))] + ralsdcp*(zqxn[jm] - zqx[jk + klev*jm] - + zfluxq[jm])*zqtmst; + } + + //---------------------------------------------------------------------- + // New prognostic tendencies - ice,liquid rain,snow + // Note: CLV arrays use PCLV in calculation of tendency while humidity + // uses ZQX. This is due to clipping at start of cloudsc which + // include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + //---------------------------------------------------------------------- + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))] = + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*(ibl)))] + (zqxn[jm] - + zqx0[jk + klev*jm])*zqtmst; + + } + + //---------------------- + // 6.2 Humidity budget + //---------------------- + tendency_loc_q[jl + klon*(jk + klev*(ibl))] = tendency_loc_q[jl + + klon*(jk + klev*(ibl))] + (zqxn[4] - zqx[jk + klev*(4)])*zqtmst; + + //------------------- + // 6.3 cloud cover + //----------------------- + tendency_loc_a[jl + klon*(jk + klev*(ibl))] = + tendency_loc_a[jl + klon*(jk + klev*(ibl))] + zda*zqtmst; + + //-------------------------------------------------- + // Copy precipitation fraction into output variable + //------------------------------------------------- + pcovptot[jl + klon*(jk + klev*(ibl))] = zcovptot; + + } + // on vertical level JK + //---------------------------------------------------------------------- + // END OF VERTICAL LOOP + //---------------------------------------------------------------------- + + //###################################################################### + // 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + //###################################################################### + + //-------------------------------------------------------------------- + // Copy general precip arrays back into PFP arrays for GRIB archiving + // Add rain and liquid fluxes, ice and snow fluxes + //-------------------------------------------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfplsl[jl + klon*(jk + (klev + 1)*(ibl))] = + zpfplsx[jk + (klev + 1)*(2)] + zpfplsx[jk + (klev + 1)*(0)]; + pfplsn[jl + klon*(jk + (klev + 1)*(ibl))] = + zpfplsx[jk + (klev + 1)*(3)] + zpfplsx[jk + (klev + 1)*(1)]; + } + + //-------- + // Fluxes: + //-------- + pfsqlf[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqif[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqrf[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqsf[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfcqlng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfcqnng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfcqrng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; //rain + pfcqsng[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; //snow + // fluxes due to turbulence + pfsqltur[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + pfsqitur[jl + klon*(0 + (klev + 1)*(ibl))] = (double) 0.0; + + for (jk = 0; jk <= klev + -1; jk += 1) { + + zgdph_r = -zrg_r*(paph[jl + klon*(1 + jk + (klev + 1)*(ibl))] - paph[ + jl + klon*(jk + (klev + 1)*(ibl))])*zqtmst; + pfsqlf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqlf[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqif[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqif[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqrf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqlf[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqsf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqif[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqlng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqlng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqnng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqnng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqrng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqlng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfcqsng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfcqnng[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqltur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqltur[jl + klon*(jk + (klev + 1)*(ibl))]; + pfsqitur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = + pfsqitur[jl + klon*(jk + (klev + 1)*(ibl))]; + + zalfaw = zfoealfa[jk]; + + // Liquid , LS scheme minus detrainment + pfsqlf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqlf[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(0)] - zqx0[jk + klev*(-1 + + 1)] + pvfl[jl + klon*(jk + klev*(ibl))]*ptsphy - zalfaw*plude[ + jl + klon*(jk + klev*(ibl))])*zgdph_r; + // liquid, negative numbers + pfcqlng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqlng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(0)]*zgdph_r; + + // liquid, vertical diffusion + pfsqltur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqltur[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + pvfl[jl + klon*(jk + klev*(ibl + ))]*ptsphy*zgdph_r; + + // Rain, LS scheme + pfsqrf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqrf[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(2)] - zqx0[jk + klev*(-1 + + 3)])*zgdph_r; + // rain, negative numbers + pfcqrng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqrng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(2)]*zgdph_r; + + // Ice , LS scheme minus detrainment + pfsqif[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqif[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(1)] - zqx0[jk + klev*(-1 + + 2)] + pvfi[jl + klon*(jk + klev*(ibl))]*ptsphy - ((double) 1.0 - + zalfaw)*plude[jl + klon*(jk + klev*(ibl))])*zgdph_r; + // ice, negative numbers + pfcqnng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqnng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(1)]*zgdph_r; + + // ice, vertical diffusion + pfsqitur[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqitur[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + pvfi[jl + klon*(jk + klev*(ibl + ))]*ptsphy*zgdph_r; + + // snow, LS scheme + pfsqsf[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfsqsf[jl + klon*(1 + + jk + (klev + 1)*(ibl))] + (zqxn2d[jk + klev*(3)] - zqx0[jk + klev*(-1 + + 4)])*zgdph_r; + // snow, negative numbers + pfcqsng[jl + klon*(1 + jk + (klev + 1)*(ibl))] = pfcqsng[jl + + klon*(1 + jk + (klev + 1)*(ibl))] + zlneg[jk + klev*(3)]*zgdph_r; + } + + //----------------------------------- + // enthalpy flux due to precipitation + //----------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfhpsl[jl + klon*(jk + (klev + 1)*(ibl))] = + -rlvtt*pfplsl[jl + klon*(jk + (klev + 1)*(ibl))]; + pfhpsn[jl + klon*(jk + (klev + 1)*(ibl))] = + -rlstt*pfplsn[jl + klon*(jk + (klev + 1)*(ibl))]; + } +} + diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel b/src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel new file mode 100644 index 00000000..159a02f4 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel @@ -0,0 +1,2651 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include +#include +#include + +void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2, + double * __restrict__ zfoealfa, double * __restrict__ ztp1, double * __restrict__ zli, + double * __restrict__ za, double * __restrict__ zaorig, double * __restrict__ zliqfrac, + double * __restrict__ zicefrac, double * __restrict__ zqx, double * __restrict__ zqx0, + double * __restrict__ zpfplsx, double * __restrict__ zlneg, double * __restrict__ zqxn2d, + double * __restrict__ zqsmix, double * __restrict__ zqsliq, double * __restrict__ zqsice, + double * __restrict__ zfoeewmt, double * __restrict__ zfoeew, double * __restrict__ zfoeeliqt, + cl::sycl:: stream out_stream, cl::sycl::nd_item<1> item_ct) { + + //------------------------------------------------------------------------------- + // Declare input/output arguments + //------------------------------------------------------------------------------- + + // PLCRIT_AER : critical liquid mmr for rain autoconversion process + // PICRIT_AER : critical liquid mmr for snow autoconversion process + // PRE_LIQ : liq Re + // PRE_ICE : ice Re + // PCCN : liquid cloud condensation nuclei + // PNICE : ice number concentration (cf. CCN) + + const int klev = 137; // Number of levels + + double zlcond1, zlcond2, zlevapl, zlevapi, zrainaut, zsnowaut, zliqcld, zicecld; + double zlevap, zleros; + // condensation and evaporation terms + // autoconversion terms + double zfokoop; + //double zfoealfa[klev + 1]; + double zicenuclei; // number concentration of ice nuclei + + double zlicld; + double zacond; + double zaeros; + double zlfinalsum; + double zdqs; + double ztold; + double zqold; + double zdtgdp; + double zrdtgdp; + double ztrpaus; + double zcovpclr; + double zpreclr; + double zcovptot; + double zcovpmax; + double zqpretot; + double zdpevap; + double zdtforc; + double zdtdiab; + //double ztp1[klev]; + double zldefr; + double zldifdt; + double zdtgdpf; + double zlcust[5]; + double zacust; + double zmf; + + double zrho; + double ztmp1, ztmp2, ztmp3; + double ztmp4, ztmp5, ztmp6, ztmp7; + double zalfawm; + + // Accumulators of A,B,and C factors for cloud equations + double zsolab; // -ve implicit CC + double zsolac; // linear CC + double zanew; + double zanewm1; + + double zgdp; + + //---for flux calculation + double zda; + //double zli[klev], za[klev]; + //double zaorig[klev]; // start of scheme value for CC + + int llflag; + int llo1; + + int icall, ik, jk, jl, jm, jn, jo, jlen, is; + + double zdp, zpaphd; + + double zalfa; + // & ZALFACU, ZALFALS + double zalfaw; + double zbeta, zbeta1; + //REAL(KIND=JPRB) :: ZBOTT + double zcfpr; + double zcor; + double zcdmax; + double zmin; + double zlcondlim; + double zdenom; + double zdpmxdt; + double zdpr; + double zdtdp; + double ze; + double zepsec; + double zfac, zfaci, zfacw; + double zgdcp; + double zinew; + double zlcrit; + double zmfdn; + double zprecip; + double zqe; + double zqsat, zqtmst, zrdcp; + double zrhc, zsig, zsigk; + double zwtot; + double zzco, zzdl, zzrh, zzzdt, zqadj; + double zqnew, ztnew; + double zrg_r, zgdph_r, zcons1, zcond, zcons1a; + double zlfinal; + double zmelt; + double zevap; + double zfrz; + double zvpliq, zvpice; + double zadd, zbdd, zcvds, zice0, zdepos; + double zsupsat; + double zfall; + double zre_ice; + double zrldcp; + double zqp1env; + + //---------------------------- + // Arrays for new microphysics + //---------------------------- + int iphase[5]; // marker for water phase of each species + // 0=vapour, 1=liquid, 2=ice + + int imelt[5]; // marks melting linkage for ice categories + // ice->liquid, snow->rain + + int llfall[5]; // marks falling species + // LLFALL=0, cloud cover must > 0 for zqx > 0 + // LLFALL=1, no cloud needed, zqx can evaporate + + int llindex1[5]; // index variable + int llindex3[5 * 5]; // index variable + double zmax; + double zrat; + int iorder[5]; // array for sorting explicit terms + + //double zliqfrac[klev]; // cloud liquid water fraction: ql/(ql+qi) + //double zicefrac[klev]; // cloud ice water fraction: qi/(ql+qi) + double zqxn[5]; // new values for zqx at time+1 + double zqxfg[5]; // first guess values including precip + double zqxnm1[5]; // new values for zqx at time+1 at level above + double zfluxq[5]; // fluxes convergence of species (needed?) + // Keep the following for possible future total water variance scheme? + //REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + //REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + //REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + //REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + //REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + double zmeltmax; + double zfrzmax; + double zicetot; + + + //double zqsmix[klev]; // diagnostic mixed phase saturation + //REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + //double zqsliq[klev]; // liquid water saturation + //double zqsice[klev]; // ice water saturation + + //REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + //REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + //REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + //double zfoeewmt[klev]; + //double zfoeew[klev]; + //double zfoeeliqt[klev]; + //REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + double zdqsliqdt, zdqsicedt, zdqsmixdt; + double zcorqsliq; + double zcorqsice; + //REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + double zcorqsmix; + double zevaplimliq, zevaplimice, zevaplimmix; + + //------------------------------------------------------- + // SOURCE/SINK array for implicit and explicit terms + //------------------------------------------------------- + // a POSITIVE value entered into the arrays is a... + // Source of this variable + // | + // | Sink of this variable + // | | + // V V + // ZSOLQA(JL,IQa,IQb) = explicit terms + // ZSOLQB(JL,IQa,IQb) = implicit terms + // Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + // a source of NCLDQL and a sink of IQV + // put 'magic' source terms such as PLUDE from + // detrainment into explicit source/sink array diagnognal + // ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + // i.e. A positive value is a sink!????? weird... + //------------------------------------------------------- + + double zsolqa[5 * 5]; // explicit sources and sinks + double zsolqb[5 * 5]; // implicit sources and sinks + // e.g. microphysical pathways between ice variables. + double zqlhs[5 * 5]; // n x n matrix storing the LHS of implicit solver + double zvqx[5]; // fall speeds of three categories + double zexplicit; + double zratio[5], zsinksum[5]; + + // for sedimentation source/sink terms + double zfallsink[5]; + double zfallsrce[5]; + + // for convection detrainment source and subsidence source/sink terms + double zconvsrce[5]; + double zconvsink[5]; + + // for supersaturation source term from previous timestep + double zpsupsatsrce[5]; + + // Numerical fit to wet bulb temperature + double ztw1 = (double) 1329.31; + double ztw2 = (double) 0.0074615; + double ztw3 = (double) 0.85E5; + double ztw4 = (double) 40.637; + double ztw5 = (double) 275.0; + + double zsubsat; // Subsaturation for snow melting term + double ztdmtw0; // Diff between dry-bulb temperature and + // temperature when wet-bulb = 0degC + + // Variables for deposition term + double ztcg; // Temperature dependent function for ice PSD + double zfacx1i, zfacx1s; // PSD correction factor + double zaplusb, zcorrfac, zcorrfac2, zpr02, zterm1, zterm2; // for ice dep + double zcldtopdist; // Distance from cloud top + double zinfactor; // No. of ice nuclei factor for deposition + + // Autoconversion/accretion/riming/evaporation + int iwarmrain; + int ievaprain; + int ievapsnow; + int idepice; + double zrainacc; + double zraincld; + double zsnowrime; + double zsnowcld; + double zesatliq; + double zfallcorr; + double zlambda; + double zevap_denom; + double zcorr2; + double zka; + double zconst; + double ztemp; + + // Rain freezing + int llrainliq; // True if majority of raindrops are liquid (no ice core) + + //---------------------------- + // End: new microphysics + //---------------------------- + + //---------------------- + // SCM budget statistics + //---------------------- + double zrain; + + double zhook_handle; + double ztmpl, ztmpi, ztmpa; + + double zmm, zrr; + double zrg; + + double zzsum, zzratio; + double zepsilon; + + double zcond1, zqp; + + double psum_solqa; + int ibl; + int i_llfall_0; + //double zqx[5 * klev]; + //double zqx0[5 * klev]; + //double zpfplsx[5 * (klev + 1)]; + //double zlneg[5 * klev]; + //double zqxn2d[5 * klev]; + + jl = item_ct.get_local_id(0); //threadIdx.x; + ibl = item_ct.get_group(0); // or 2? blockIdx.z; + + //=============================================================================== + //IF (LHOOK) CALL DR_HOOK('CLOUDSC',0,ZHOOK_HANDLE) + + //=============================================================================== + // 0.0 Beginning of timestep book-keeping + //---------------------------------------------------------------------- + + + //###################################################################### + // 0. *** SET UP CONSTANTS *** + //###################################################################### + + zepsilon = (double) 100.*std::numeric_limits::epsilon(); //DBL_EPSILON; + + // --------------------------------------------------------------------- + // Set version of warm-rain autoconversion/accretion + // IWARMRAIN = 1 ! Sundquist + // IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + // --------------------------------------------------------------------- + iwarmrain = 2; + // --------------------------------------------------------------------- + // Set version of rain evaporation + // IEVAPRAIN = 1 ! Sundquist + // IEVAPRAIN = 2 ! Abel and Boutle (2013) + // --------------------------------------------------------------------- + ievaprain = 2; + // --------------------------------------------------------------------- + // Set version of snow evaporation + // IEVAPSNOW = 1 ! Sundquist + // IEVAPSNOW = 2 ! New + // --------------------------------------------------------------------- + ievapsnow = 1; + // --------------------------------------------------------------------- + // Set version of ice deposition + // IDEPICE = 1 ! Rotstayn (2001) + // IDEPICE = 2 ! New + // --------------------------------------------------------------------- + idepice = 1; + + // --------------------- + // Some simple constants + // --------------------- + zqtmst = (double) 1.0 / ptsphy; + zgdcp = rg / rcpd; + zrdcp = rd / rcpd; + zcons1a = rcpd / (rlmlt*rg*(*yrecldp).rtaumel); + zepsec = (double) 1.E-14; + zrg_r = (double) 1.0 / rg; + zrldcp = (double) 1.0 / (ralsdcp - ralvdcp); + + // Note: Defined in module/yoecldp.F90 + // NCLDQL=1 ! liquid cloud water + // NCLDQI=2 ! ice cloud water + // NCLDQR=3 ! rain water + // NCLDQS=4 ! snow + // NCLDQV=5 ! vapour + + // ----------------------------------------------- + // Define species phase, 0=vapour, 1=liquid, 2=ice + // ----------------------------------------------- + iphase[4] = 0; + iphase[0] = 1; + iphase[2] = 1; + iphase[1] = 2; + iphase[3] = 2; + + // --------------------------------------------------- + // Set up melting/freezing index, + // if an ice category melts/freezes, where does it go? + // --------------------------------------------------- + imelt[4] = -99; + imelt[0] = 2; + imelt[2] = 4; + imelt[1] = 3; + imelt[3] = 3; + + // ----------------------------------------------- + // INITIALIZATION OF OUTPUT TENDENCIES + // ----------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_a[jl + klon*(jk + klev*ibl)] = (double) 0.0; + } + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; + } + } + + //-- These were uninitialized : meaningful only when we compare error differences + for (jk = 0; jk <= klev + -1; jk += 1) { + pcovptot[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_cld[jl + klon*(jk + klev*(4 + 5*ibl))] = (double) 0.0; + } + + // ------------------------- + // set up fall speeds in m/s + // ------------------------- + zvqx[4] = (double) 0.0; + zvqx[0] = (double) 0.0; + zvqx[1] = (*yrecldp).rvice; + zvqx[2] = (*yrecldp).rvrain; + zvqx[3] = (*yrecldp).rvsnow; + for (i_llfall_0 = 0; i_llfall_0 <= 5 + -1; i_llfall_0 += 1) { + llfall[i_llfall_0] = false; + } + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (zvqx[jm] > (double) 0.0) { + llfall[jm] = true; + } + // falling species + } + // Set LLFALL to false for ice (but ice still sediments!) + // Need to rationalise this at some point + llfall[1] = false; + + + //###################################################################### + // 1. *** INITIAL VALUES FOR VARIABLES *** + //###################################################################### + + + // ---------------------- + // non CLV initialization + // ---------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + ztp1[jl + klon*(jk + klev*ibl)] = + pt[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_t[jl + klon*(jk + klev*ibl)]; + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = + pq[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + zqx0[jl + klon*(jk + klev*(4 + 5*ibl))] = + pq[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + za[jl + klon*(jk + klev*ibl)] = + pa[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + zaorig[jl + klon*(jk + klev*ibl)] = + pa[jl + klon*(jk + klev*ibl)] + ptsphy*tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + } + + // ------------------------------------- + // initialization for CLV family + // ------------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqx[jl + klon*(jk + klev*(jm + 5*ibl))] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqx0[jl + klon*(jk + klev*(jm + 5*ibl))] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + + ptsphy*tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + } + } + + //------------- + // zero arrays + //------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + zpfplsx[jl + klon*(jk + (klev + 1)*(jm + 5*ibl))] = (double) 0.0; // precip fluxes + } + } + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + zqxn2d[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; // end of timestep values in 2D + zlneg[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; // negative input check + } + } + + prainfrac_toprfz[jl + klon*ibl] = (double) 0.0; // rain fraction at top of refreezing layer + llrainliq = true; // Assume all raindrops are liquid initially + + // ---------------------------------------------------- + // Tidy up very small cloud cover or total cloud water + // ---------------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + if (zqx[jl + klon*(jk + klev*(0 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))] < (*yrecldp).rlmin || + za[jl + klon*(jk + klev*ibl)] < (*yrecldp).ramin) { + + // Evaporate small cloud liquid water amounts + zlneg[jl + klon*(jk + klev*(0 + 5*ibl))] = zlneg[jl + klon*(jk + klev*(0 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + zqadj = zqx[jl + klon*(jk + klev*(0 + 5*ibl))]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + zqx[jl + klon*(jk + klev*(0 + 5*ibl))] = (double) 0.0; + + // Evaporate small cloud ice water amounts + zlneg[jl + klon*(jk + klev*(1 + 5*ibl))] = zlneg[jl + klon*(jk + klev*(1 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + zqadj = zqx[jl + klon*(jk + klev*(1 + 5*ibl))]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + zqx[jl + klon*(jk + klev*(1 + 5*ibl))] = (double) 0.0; + + // Set cloud cover to zero + za[jl + klon*(jk + klev*ibl)] = (double) 0.0; + + } + } + + // --------------------------------- + // Tidy up small CLV variables + // --------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + if (zqx[jl + klon*(jk + klev*(jm + 5*ibl))] < (*yrecldp).rlmin) { + zlneg[jl + klon*(jk + klev*(jm + 5*ibl))] = zlneg[jl + klon*(jk + klev*(jm + 5*ibl))] + + zqx[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqadj = zqx[jl + klon*(jk + klev*(jm + 5*ibl))]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + } + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + } + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] = zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqx[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; + } + } + } + + + // ------------------------------ + // Define saturation values + // ------------------------------ + for (jk = 0; jk <= klev + -1; jk += 1) { + //---------------------------------------- + // old *diagnostic* mixed phase saturation + //---------------------------------------- + zfoealfa[jl + klon*(jk + (klev + 1)*ibl)] = + ((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))); + zfoeewmt[jl + klon*(jk + klev*ibl)] = + fmin(((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))) / + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsmix[jl + klon*(jk + klev*ibl)] = zfoeewmt[jl + klon*(jk + klev*ibl)]; + zqsmix[jl + klon*(jk + klev*ibl)] = zqsmix[jl + klon*(jk + klev*ibl)] / + ((double) 1.0 - retv*zqsmix[jl + klon*(jk + klev*ibl)]); + + //--------------------------------------------- + // ice saturation T<273K + // liquid water saturation for T>273K + //--------------------------------------------- + zalfa = ((double)(fmax(0.0, copysign(1.0, ztp1[jl + klon*(jk + klev*ibl)] - rtt)))); + zfoeew[jl + klon*(jk + klev*ibl)] = fmin((zalfa*((double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))) + ((double) 1.0 - zalfa)* + ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))/ + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zfoeew[jl + klon*(jk + klev*ibl)] = fmin((double) 0.5, zfoeew[jl + klon*(jk + klev*ibl)]); + zqsice[jl + klon*(jk + klev*ibl)] = zfoeew[jl + klon*(jk + klev*ibl)] / + ((double) 1.0 - retv*zfoeew[jl + klon*(jk + klev*ibl)]); + + //---------------------------------- + // liquid water saturation + //---------------------------------- + zfoeeliqt[jl + klon*(jk + klev*ibl)] = fmin(((double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))) / pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsliq[jl + klon*(jk + klev*ibl)] = zfoeeliqt[jl + klon*(jk + klev*ibl)]; + zqsliq[jl + klon*(jk + klev*ibl)] = zqsliq[jl + klon*(jk + klev*ibl)] / + ((double) 1.0 - retv*zqsliq[jl + klon*(jk + klev*ibl)]); + + // !---------------------------------- + // ! ice water saturation + // !---------------------------------- + // ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + // ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + // ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-RETV*ZQSICE(JL,JK)) + + } + + for (jk = 0; jk <= klev + -1; jk += 1) { + + + //------------------------------------------ + // Ensure cloud fraction is between 0 and 1 + //------------------------------------------ + za[jl + klon*(jk + klev*ibl)] = + fmax((double) 0.0, fmin((double) 1.0, za[jl + klon*(jk + klev*ibl)])); + + //------------------------------------------------------------------- + // Calculate liq/ice fractions (no longer a diagnostic relationship) + //------------------------------------------------------------------- + zli[jl + klon*(jk + klev*ibl)] = zqx[jl + klon*(jk + klev*(0 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + if (zli[jl + klon*(jk + klev*ibl)] > (*yrecldp).rlmin) { + zliqfrac[jl + klon*(jk + klev*ibl)] = + zqx[jl + klon*(jk + klev*(0 + 5*ibl))] / zli[jl + klon*(jk + klev*ibl)]; + zicefrac[jl + klon*(jk + klev*ibl)] = + (double) 1.0 - zliqfrac[jl + klon*(jk + klev*ibl)]; + } else { + zliqfrac[jl + klon*(jk + klev*ibl)] = (double) 0.0; + zicefrac[jl + klon*(jk + klev*ibl)] = (double) 0.0; + } + } + + //###################################################################### + // 2. *** CONSTANTS AND PARAMETERS *** + //###################################################################### + // Calculate L in updrafts of bl-clouds + // Specify QS, P/PS for tropopause (for c2) + // And initialize variables + //------------------------------------------ + + //--------------------------------- + // Find tropopause level (ZTRPAUS) + //--------------------------------- + ztrpaus = (double) 0.1; + zpaphd = (double) 1.0 / paph[jl + klon*(klev + (klev + 1)*ibl)]; + for (jk = 0; jk <= klev - 1 + -1; jk += 1) { + zsig = pap[jl + klon*(jk + klev*ibl)]*zpaphd; + if (zsig > (double) 0.1 && zsig < (double) 0.4 && ztp1[jl + klon*(jk + klev*ibl)] > + ztp1[jl + klon*(1 + jk + klev*ibl)]) { + ztrpaus = zsig; + } + } + + //----------------------------- + // Reset single level variables + //----------------------------- + + zanewm1 = (double) 0.0; + zda = (double) 0.0; + zcovpclr = (double) 0.0; + zcovpmax = (double) 0.0; + zcovptot = (double) 0.0; + zcldtopdist = (double) 0.0; + + //###################################################################### + // 3. *** PHYSICS *** + //###################################################################### + + + //---------------------------------------------------------------------- + // START OF VERTICAL LOOP + //---------------------------------------------------------------------- + + for (jk = -1 + (*yrecldp).ncldtop; jk <= klev + -1; jk += 1) { + + //---------------------------------------------------------------------- + // 3.0 INITIALIZE VARIABLES + //---------------------------------------------------------------------- + + //--------------------------------- + // First guess microphysics + //--------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxfg[jm] = zqx[jl + klon*(jk + klev*(jm + 5*ibl))]; + } + + //--------------------------------- + // Set KLON arrays to zero + //--------------------------------- + + zlicld = (double) 0.0; + zrainaut = (double) 0.0; // currently needed for diags + zrainacc = (double) 0.0; // currently needed for diags + zsnowaut = (double) 0.0; // needed + zldefr = (double) 0.0; + zacust = (double) 0.0; // set later when needed + zqpretot = (double) 0.0; + zlfinalsum = (double) 0.0; + + // Required for first guess call + zlcond1 = (double) 0.0; + zlcond2 = (double) 0.0; + zsupsat = (double) 0.0; + zlevapl = (double) 0.0; + zlevapi = (double) 0.0; + + //------------------------------------- + // solvers for cloud fraction + //------------------------------------- + zsolab = (double) 0.0; + zsolac = (double) 0.0; + + zicetot = (double) 0.0; + + //------------------------------------------ + // reset matrix so missing pathways are set + //------------------------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsolqb[jn + 5*jm] = (double) 0.0; + zsolqa[jn + 5*jm] = (double) 0.0; + } + } + + //---------------------------------- + // reset new microphysics variables + //---------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zfallsrce[jm] = (double) 0.0; + zfallsink[jm] = (double) 0.0; + zconvsrce[jm] = (double) 0.0; + zconvsink[jm] = (double) 0.0; + zpsupsatsrce[jm] = (double) 0.0; + zratio[jm] = (double) 0.0; + } + + + //------------------------- + // derived variables needed + //------------------------- + + zdp = paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - + paph[jl + klon*(jk + (klev + 1)*ibl)]; // dp + zgdp = rg / zdp; // g/dp + zrho = pap[jl + klon*(jk + klev*ibl)] / (rd*ztp1[jl + klon*(jk + klev*ibl)]); // p/RT air density + + zdtgdp = ptsphy*zgdp; // dt g/dp + zrdtgdp = zdp*((double) 1.0 / (ptsphy*rg)); // 1/(dt g/dp) + + if (jk + 1 > 1) { + zdtgdpf = ptsphy*rg / (pap[jl + klon*(jk + klev*ibl)] - + pap[jl + klon*(-1 + jk + klev*ibl)]); + } + + //------------------------------------ + // Calculate dqs/dT correction factor + //------------------------------------ + // Reminder: RETV=RV/RD-1 + + // liquid + zfacw = r5les / (pow((ztp1[jl + klon*(jk + klev*ibl)] - r4les), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeeliqt[jl + klon*(jk + klev*ibl)]); + zdqsliqdt = zfacw*zcor*zqsliq[jl + klon*(jk + klev*ibl)]; + zcorqsliq = (double) 1.0 + ralvdcp*zdqsliqdt; + + // ice + zfaci = r5ies / (pow((ztp1[jl + klon*(jk + klev*ibl)] - r4ies), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeew[jl + klon*(jk + klev*ibl)]); + zdqsicedt = zfaci*zcor*zqsice[jl + klon*(jk + klev*ibl)]; + zcorqsice = (double) 1.0 + ralsdcp*zdqsicedt; + + // diagnostic mixed + zalfaw = zfoealfa[jl + klon*(jk + (klev + 1)*ibl)]; + zalfawm = zalfaw; + zfac = zalfaw*zfacw + ((double) 1.0 - zalfaw)*zfaci; + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeewmt[jl + klon*(jk + klev*ibl)]); + zdqsmixdt = zfac*zcor*zqsmix[jl + klon*(jk + klev*ibl)]; + zcorqsmix = (double) 1.0 + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)* + rtwat_rtice_r, 2)))*ralvdcp + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)* + rtwat_rtice_r, 2))))*ralsdcp))*zdqsmixdt; + + // evaporation/sublimation limits + zevaplimmix = fmax((zqsmix[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]) / zcorqsmix, (double) 0.0); + zevaplimliq = fmax((zqsliq[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]) / zcorqsliq, (double) 0.0); + zevaplimice = fmax((zqsice[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]) / zcorqsice, (double) 0.0); + + //-------------------------------- + // in-cloud consensate amount + //-------------------------------- + ztmpa = (double) 1.0 / fmax(za[jl + klon*(jk + klev*ibl)], zepsec); + zliqcld = zqx[jl + klon*(jk + klev*(0 + 5*ibl))]*ztmpa; + zicecld = zqx[jl + klon*(jk + klev*(1 + 5*ibl))]*ztmpa; + zlicld = zliqcld + zicecld; + + + //------------------------------------------------ + // Evaporate very small amounts of liquid and ice + //------------------------------------------------ + + if (zqx[jl + klon*(jk + klev*(0 + 5*ibl))] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(0)] = zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + zsolqa[0 + 5*(4)] = -zqx[jl + klon*(jk + klev*(0 + 5*ibl))]; + } + + if (zqx[jl + klon*(jk + klev*(1 + 5*ibl))] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(1)] = zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + zsolqa[1 + 5*(4)] = -zqx[jl + klon*(jk + klev*(1 + 5*ibl))]; + } + + + //--------------------------------------------------------------------- + // 3.1 ICE SUPERSATURATION ADJUSTMENT + //--------------------------------------------------------------------- + // Note that the supersaturation adjustment is made with respect to + // liquid saturation: when T>0C + // ice saturation: when T<0C + // with an adjustment made to allow for ice + // supersaturation in the clear sky + // Note also that the KOOP factor automatically clips the supersaturation + // to a maximum set by the liquid water saturation mixing ratio + // important for temperatures near to but below 0C + //----------------------------------------------------------------------- + + //----------------------------------- + // 3.1.1 Supersaturation limit (from Koop) + //----------------------------------- + // Needs to be set for all temperatures + zfokoop = ((double)(fmin(rkoop1 - rkoop2*ztp1[jl + klon*(jk + klev*ibl)], (double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))*1.0/(double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))); + + if (ztp1[jl + klon*(jk + klev*ibl)] >= rtt || (*yrecldp).nssopt == 0) { + zfac = (double) 1.0; + zfaci = (double) 1.0; + } else { + zfac = za[jl + klon*(jk + klev*ibl)] + zfokoop*((double) 1.0 - za[jl + klon*(jk +klev*ibl)]); + zfaci = ptsphy / (*yrecldp).rkooptau; + } + + //------------------------------------------------------------------- + // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + // correction factor + // [#Note: QSICE or QSLIQ] + //------------------------------------------------------------------- + + // Calculate supersaturation to add to cloud + if (za[jl + klon*(jk + klev*ibl)] > (double) 1.0 - (*yrecldp).ramin) { + zsupsat = fmax((zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - zfac*zqsice[jl + klon*(jk + klev*ibl)]) / + zcorqsice, (double) 0.0); + } else { + // Calculate environmental humidity supersaturation + zqp1env = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - + za[jl + klon*(jk + klev*ibl)]*zqsice[jl + klon*(jk + klev*ibl)]) / + fmax((double) 1.0 - za[jl + klon*(jk + klev*ibl)], zepsilon); + //& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + zsupsat = fmax(((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*(zqp1env - zfac*zqsice[jl + klon*(jk + klev*ibl)]) / + zcorqsice, (double) 0.0); + } + + //------------------------------------------------------------------- + // Here the supersaturation is turned into liquid water + // However, if the temperature is below the threshold for homogeneous + // freezing then the supersaturation is turned instantly to ice. + //-------------------------------------------------------------------- + + if (zsupsat > zepsec) { + + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zsupsat; + // Include liquid in first guess + zqxfg[0] = zqxfg[0] + zsupsat; + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zsupsat; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + zsupsat; + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*zfaci; + + } + + //------------------------------------------------------- + // 3.1.3 Include supersaturation from previous timestep + // (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + //------------------------------------------------------- + if (psupsat[jl + klon*(jk + klev*ibl)] > zepsec) { + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(0)] = zsolqa[0 + 5*(0)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[0] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add liquid to first guess for deposition term + zqxfg[0] = zqxfg[0] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(1)] = zsolqa[1 + 5*(1)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[1] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*zfaci; + // Store cloud budget diagnostics if required + } + + // on JL + + //--------------------------------------------------------------------- + // 3.2 DETRAINMENT FROM CONVECTION + //--------------------------------------------------------------------- + // * Diagnostic T-ice/liq split retained for convection + // Note: This link is now flexible and a future convection + // scheme can detrain explicit seperate budgets of: + // cloud water, ice, rain and snow + // * There is no (1-ZA) multiplier term on the cloud detrainment + // term, since is now written in mass-flux terms + // [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + //--------------------------------------------------------------------- + if (jk + 1 < klev && jk + 1 >= (*yrecldp).ncldtop) { + + + plude[jl + klon*(jk + klev*ibl)] = plude[jl + klon*(jk + klev*ibl)]*zdtgdp; + + if (/*ldcum[jl + klon*ibl] &&*/ plude[jl + klon*(jk + klev*ibl)] > (*yrecldp).rlmin + && plu[jl + klon*(1 + jk + klev*ibl)] > zepsec) { + + zsolac = zsolac + plude[jl + klon*(jk + klev*ibl)] / + plu[jl + klon*(1 + jk + klev*ibl)]; + // *diagnostic temperature split* + zalfaw = zfoealfa[jl + klon*(jk + (klev + 1)*ibl)]; + zconvsrce[0] = zalfaw*plude[jl + klon*(jk + klev*ibl)]; + zconvsrce[1] = ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)]; + zsolqa[0 + 5*(0)] = zsolqa[0 + 5*(0)] + zconvsrce[0]; + zsolqa[1 + 5*(1)] = zsolqa[1 + 5*(1)] + zconvsrce[1]; + + } else { + + plude[jl + klon*(jk + klev*ibl)] = (double) 0.0; + + } + // *convective snow detrainment source + //if (ldcum[jl + klon*ibl]) { + zsolqa[3 + 5*(3)] = zsolqa[3 + 5*(3)] + psnde[jl + klon*(jk + klev*ibl)]*zdtgdp; + //} + + + } + // JK (*yrecldp).ncldtop) { + + zmf = fmax((double) 0.0, (pmfu[jl + klon*(jk + klev*ibl)] + pmfd[jl + klon*(jk + klev*ibl)])*zdtgdp); + zacust = zmf*zanewm1; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlcust[jm] = zmf*zqxnm1[jm]; + // record total flux for enthalpy budget: + zconvsrce[jm] = zconvsrce[jm] + zlcust[jm]; + } + } + + // Now have to work out how much liquid evaporates at arrival point + // since there is no prognostic memory for in-cloud humidity, i.e. + // we always assume cloud is saturated. + + zdtdp = zrdcp*(double) 0.5*(ztp1[jl + klon*(-1 + jk + klev*ibl)] + + ztp1[jl + klon*(jk + klev*ibl)]) / paph[jl + klon*(jk + (klev + 1)*ibl)]; + zdtforc = zdtdp*(pap[jl + klon*(jk + klev*ibl)] - pap[jl + klon*(-1 + jk + klev*ibl)]); + //[#Note: Diagnostic mixed phase should be replaced below] + zdqs = zanewm1*zdtforc*zdqsmixdt; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlfinal = fmax((double) 0.0, zlcust[jm] - zdqs); //lim to zero + // no supersaturation allowed incloud ---V + zevap = fmin((zlcust[jm] - zlfinal), zevaplimmix); + // ZEVAP=0.0_JPRB + zlfinal = zlcust[jm] - zevap; + zlfinalsum = zlfinalsum + zlfinal; // sum + + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zlcust[jm]; // whole sum + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zevap; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zevap; + } + } + + // Reset the cloud contribution if no cloud water survives to this level: + if (zlfinalsum < zepsec) { + zacust = (double) 0.0; + } + zsolac = zsolac + zacust; + + } + // on JK>NCLDTOP + + //--------------------------------------------------------------------- + // Subsidence sink of cloud to the layer below + // (Implicit - re. CFL limit on convective mass flux) + //--------------------------------------------------------------------- + + + if (jk + 1 < klev) { + + zmfdn = fmax((double) 0.0, (pmfu[jl + klon*(1 + jk + klev*ibl)] + + pmfd[jl + klon*(1 + jk + klev*ibl)])*zdtgdp); + + zsolab = zsolab + zmfdn; + zsolqb[0 + 5*(0)] = zsolqb[0 + 5*(0)] + zmfdn; + zsolqb[1 + 5*(1)] = zsolqb[1 + 5*(1)] + zmfdn; + + // Record sink for cloud budget and enthalpy budget diagnostics + zconvsink[0] = zmfdn; + zconvsink[1] = zmfdn; + + } + + + //---------------------------------------------------------------------- + // 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + //---------------------------------------------------------------------- + // NOTE: In default tiedtke scheme this process decreases the cloud + // area but leaves the specific cloud water content + // within clouds unchanged + //---------------------------------------------------------------------- + + // ------------------------------ + // Define turbulent erosion rate + // ------------------------------ + zldifdt = (*yrecldp).rcldiff*ptsphy; //original version + //Increase by factor of 5 for convective points + if (ktype[jl + klon*ibl] > 0 && plude[jl + klon*(jk + klev*ibl)] > zepsec) { + zldifdt = (*yrecldp).rcldiff_convi*zldifdt; + } + + // At the moment, works on mixed RH profile and partitioned ice/liq fraction + // so that it is similar to previous scheme + // Should apply RHw for liquid cloud and RHi for ice cloud separately + if (zli[jl + klon*(jk + klev*ibl)] > zepsec) { + // Calculate environmental humidity + // ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + // & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + // ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ze = zldifdt*fmax(zqsmix[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))], (double) 0.0); + zleros = za[jl + klon*(jk + klev*ibl)]*ze; + zleros = fmin(zleros, zevaplimmix); + zleros = fmin(zleros, zli[jl + klon*(jk + klev*ibl)]); + zaeros = zleros / zlicld; //if linear term + + // Erosion is -ve LINEAR in L,A + zsolac = zsolac - zaeros; //linear + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jl + klon*(jk + klev*ibl)]*zleros; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jl + klon*(jk + klev*ibl)]*zleros; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jl + klon*(jk + klev*ibl)]*zleros; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jl + klon*(jk + klev*ibl)]*zleros; + + } + + //---------------------------------------------------------------------- + // 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + //---------------------------------------------------------------------- + // calculate dqs/dt + // Note: For the separate prognostic Qi and Ql, one would ideally use + // Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + // forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + // These would then instantaneous freeze if T<-38C or lead to ice growth + // by deposition in warmer mixed phase clouds. However, since we do + // not have a separate prognostic equation for in-cloud humidity or a + // statistical scheme approach in place, the depositional growth of ice + // in the mixed phase can not be modelled and we resort to supersaturation + // wrt ice instanteously converting to ice over one timestep + // (see Tompkins et al. QJRMS 2007 for details) + // Thus for the initial implementation the diagnostic mixed phase is + // retained for the moment, and the level of approximation noted. + //---------------------------------------------------------------------- + + zdtdp = zrdcp*ztp1[jl + klon*(jk + klev*ibl)] / pap[jl + klon*(jk + klev*ibl)]; + zdpmxdt = zdp*zqtmst; + zmfdn = (double) 0.0; + if (jk + 1 < klev) { + zmfdn = pmfu[jl + klon*(1 + jk + klev*ibl)] + pmfd[jl + klon*(1 + jk + klev*ibl)]; + } + zwtot = pvervel[jl + klon*(jk + klev*ibl)] + + (double) 0.5*rg*(pmfu[jl + klon*(jk + klev*ibl)] + pmfd[jl + klon*(jk + klev*ibl)] + zmfdn); + zwtot = fmin(zdpmxdt, fmax(-zdpmxdt, zwtot)); + zzzdt = phrsw[jl + klon*(jk + klev*ibl)] + phrlw[jl + klon*(jk + klev*ibl)]; + zdtdiab = fmin(zdpmxdt*zdtdp, fmax(-zdpmxdt*zdtdp, zzzdt))*ptsphy + ralfdcp*zldefr; + // Note: ZLDEFR should be set to the difference between the mixed phase functions + // in the convection and cloud scheme, but this is not calculated, so is zero and + // the functions must be the same + zdtforc = zdtdp*zwtot*ptsphy + zdtdiab; + zqold = zqsmix[jl + klon*(jk + klev*ibl)]; + ztold = ztp1[jl + klon*(jk + klev*ibl)]; + ztp1[jl + klon*(jk + klev*ibl)] = ztp1[jl + klon*(jk + klev*ibl)] + zdtforc; + ztp1[jl + klon*(jk + klev*ibl)] = fmax(ztp1[jl + klon*(jk + klev*ibl)], (double) 160.0); + llflag = true; + + // Formerly a call to CUADJTQ(..., ICALL=5) + zqp = (double) 1.0 / pap[jl + klon*(jk + klev*ibl)]; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond = (zqsmix[jl + klon*(jk + klev*ibl)] - zqsat) / + ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - + rtice)*rtwat_rtice_r, 2)))*r5alvcp)*(1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4les, 2)) + + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4ies, 2))))); + ztp1[jl + klon*(jk + klev*ibl)] = ztp1[jl + klon*(jk + klev*ibl)] + + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond; + zqsmix[jl + klon*(jk + klev*ibl)] = zqsmix[jl + klon*(jk + klev*ibl)] - zcond; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond1 = (zqsmix[jl + klon*(jk + klev*ibl)] - zqsat) / + ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - + rtice)*rtwat_rtice_r, 2)))*r5alvcp)*(1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4les, 2)) + + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4ies, 2))))); + ztp1[jl + klon*(jk + klev*ibl)] = ztp1[jl + klon*(jk + klev*ibl)] + + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond1; + zqsmix[jl + klon*(jk + klev*ibl)] = zqsmix[jl + klon*(jk + klev*ibl)] - zcond1; + + zdqs = zqsmix[jl + klon*(jk + klev*ibl)] - zqold; + zqsmix[jl + klon*(jk + klev*ibl)] = zqold; + ztp1[jl + klon*(jk + klev*ibl)] = ztold; + + //---------------------------------------------------------------------- + // 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + // ---------------------------------------------------------------------- + // Erosion term is LINEAR in L + // Changed to be uniform distribution in cloud region + + + // Previous function based on DELTA DISTRIBUTION in cloud: + if (zdqs > (double) 0.0) { + // If subsidence evaporation term is turned off, then need to use updated + // liquid and cloud here? + // ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + zlevap = za[jl + klon*(jk + klev*ibl)]*fmin(zdqs, zlicld); + zlevap = fmin(zlevap, zevaplimmix); + zlevap = fmin(zlevap, fmax(zqsmix[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))], (double) 0.0)); + + // For first guess call + zlevapl = zliqfrac[jl + klon*(jk + klev*ibl)]*zlevap; + zlevapi = zicefrac[jl + klon*(jk + klev*ibl)]*zlevap; + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac[jl + klon*(jk + klev*ibl)]*zlevap; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac[jl + klon*(jk + klev*ibl)]*zlevap; + + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac[jl + klon*(jk + klev*ibl)]*zlevap; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac[jl + klon*(jk + klev*ibl)]*zlevap; + + } + + + //---------------------------------------------------------------------- + // 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + //---------------------------------------------------------------------- + // (1) Increase of cloud water in existing clouds + if (za[jl + klon*(jk + klev*ibl)] > zepsec && zdqs <= -(*yrecldp).rlmin) { + + zlcond1 = fmax(-zdqs, (double) 0.0); //new limiter + + //old limiter (significantly improves upper tropospheric humidity rms) + if (za[jl + klon*(jk + klev*ibl)] > (double) 0.99) { + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsmix[jl + klon*(jk + klev*ibl)]); + zcdmax = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - zqsmix[jl + klon*(jk + klev*ibl)]) / + ((double) 1.0 + zcor*zqsmix[jl + klon*(jk + klev*ibl)]*((double)(((double) + (fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4les, 2)) + + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jl + klon*(jk + klev*ibl)])) - rtice)*rtwat_rtice_r, 2)))) + *r5alscp)*(1.0/pow(ztp1[jl + klon*(jk + klev*ibl)] - r4ies, 2))))); + } else { + zcdmax = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]*zqsmix[jl + klon*(jk + klev*ibl)]) / + za[jl + klon*(jk + klev*ibl)]; + } + zlcond1 = fmax(fmin(zlcond1, zcdmax), (double) 0.0); + // end old limiter + + zlcond1 = za[jl + klon*(jk + klev*ibl)]*zlcond1; + if (zlcond1 < (*yrecldp).rlmin) { + zlcond1 = (double) 0.0; + } + + //------------------------------------------------------------------------- + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------- + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond1; + zqxfg[0] = zqxfg[0] + zlcond1; + } else { + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond1; + zqxfg[1] = zqxfg[1] + zlcond1; + } + } + + // (2) Generation of new clouds (da/dt>0) + + + if (zdqs <= -(*yrecldp).rlmin && za[jl + klon*(jk + klev*ibl)] < (double) 1.0 - zepsec) { + + //--------------------------- + // Critical relative humidity + //--------------------------- + zrhc = (*yrecldp).ramid; + zsigk = pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]; + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + if (zsigk > (double) 0.8) { + zrhc = (*yrecldp).ramid + ((double) 1.0 - (*yrecldp).ramid)*(pow(((zsigk - (double) 0.8) / + (double) 0.2), 2)); + } + + // Commented out for CY37R1 to reduce humidity in high trop and strat + // ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + // ZBOTT=ZTRPAUS(JL)+0.2_JPRB + // IF(ZSIGK < ZBOTT) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + // ENDIF + + //--------------------------- + // Supersaturation options + //--------------------------- + if ((*yrecldp).nssopt == 0) { + // No scheme + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 1) { + // Tompkins + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - + za[jl + klon*(jk + klev*ibl)]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 2) { + // Lohmann and Karcher + zqe = zqx[jl + klon*(jk + klev*(4 + 5*ibl))]; + } else if ((*yrecldp).nssopt == 3) { + // Gierens + zqe = + zqx[jl + klon*(jk + klev*(4 + 5*ibl))] + zli[jl + klon*(jk + klev*ibl)]; + } + + if (ztp1[jl + klon*(jk + klev*ibl)] >= rtt || (*yrecldp).nssopt == 0) { + // No ice supersaturation allowed + zfac = (double) 1.0; + } else { + // Ice supersaturation + zfac = zfokoop; + } + + if (zqe >= zrhc*zqsice[jl + klon*(jk + klev*ibl)]*zfac && + zqe < zqsice[jl + klon*(jk + klev*ibl)]*zfac) { + // note: not **2 on 1-a term if ZQE is used. + // Added correction term ZFAC to numerator 15/03/2010 + zacond = -((double) 1.0 - za[jl + klon*(jk + klev*ibl)])*zfac*zdqs / + fmax((double) 2.0*(zfac*zqsice[jl + klon*(jk + klev*ibl)] - zqe), zepsec); + + zacond = fmin(zacond, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); //PUT THE LIMITER BACK + + // Linear term: + // Added correction term ZFAC 15/03/2010 + zlcond2 = -zfac*zdqs*(double) 0.5*zacond; //mine linear + + // new limiter formulation + zzdl = (double) 2.0*(zfac*zqsice[jl + klon*(jk + klev*ibl)] - zqe) / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + // Added correction term ZFAC 15/03/2010 + if (zfac*zdqs < -zzdl) { + // ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + zlcondlim = (za[jl + klon*(jk + klev*ibl)] - (double) 1.0)*zfac*zdqs - zfac* + zqsice[jl + klon*(jk + klev*ibl)] + zqx[jl + klon*(jk + klev*(4 + 5*ibl))]; + zlcond2 = fmin(zlcond2, zlcondlim); + } + zlcond2 = fmax(zlcond2, (double) 0.0); + + if (zlcond2 < (*yrecldp).rlmin || ((double) 1.0 - + za[jl + klon*(jk + klev*ibl)]) < zepsec) { + zlcond2 = (double) 0.0; + zacond = (double) 0.0; + } + if (zlcond2 == (double) 0.0) { + zacond = (double) 0.0; + } + + // Large-scale generation is LINEAR in A and LINEAR in L + zsolac = zsolac + zacond; //linear + + //------------------------------------------------------------------------ + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------ + if (ztp1[jl + klon*(jk + klev*ibl)] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond2; + zqxfg[0] = zqxfg[0] + zlcond2; + } else { + // homogeneous freezing + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond2; + zqxfg[1] = zqxfg[1] + zlcond2; + } + + } + } + + //---------------------------------------------------------------------- + // 3.7 Growth of ice by vapour deposition + //---------------------------------------------------------------------- + // Following Rotstayn et al. 2001: + // does not use the ice nuclei number from cloudaer.F90 + // but rather a simple Meyers et al. 1992 form based on the + // supersaturation and assuming clouds are saturated with + // respect to liquid water (well mixed), (or Koop adjustment) + // Growth considered as sink of liquid water if present so + // Bergeron-Findeisen adjustment in autoconversion term no longer needed + //---------------------------------------------------------------------- + + //-------------------------------------------------------- + //- + //- Ice deposition following Rotstayn et al. (2001) + //- (monodisperse ice particle size distribution) + //- + //-------------------------------------------------------- + if (idepice == 1) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[jl + klon*(-1 + jk + klev*ibl)] < (*yrecldp).rcldtopcf && + za[jl + klon*(jk + klev*ibl)] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jl + klon*(jk + klev*ibl)] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq - (double) 0.639); + + //------------------------------------------------ + // 2.4e-2 is conductivity of air + // 8.8 = 700**1/3 = density of ice to the third + //------------------------------------------------ + zadd = rlstt*(rlstt / (rv*ztp1[jl + klon*(jk + klev*ibl)]) - (double) 1.0) / + ((double) 2.4E-2*ztp1[jl + klon*(jk + klev*ibl)]); + zbdd = rv*ztp1[jl + klon*(jk + klev*ibl)]*pap[jl + klon*(jk + klev*ibl)] / + ((double) 2.21*zvpice); + zcvds = (double) 7.8*(pow((zicenuclei / zrho), (double) 0.666))*(zvpliq - + zvpice) / ((double) 8.87*(zadd + zbdd)*zvpice); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + //------------------ + // new value of ice: + //------------------ + zinew = pow(((double) 0.666*zcvds*ptsphy + (pow(zice0, (double) 0.666))), (double) 1.5); + + //--------------------------- + // grid-mean deposition rate: + //--------------------------- + zdepos = fmax(za[jl + klon*(jk + klev*ibl)]*(zinew - zice0), (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp).rdepliqrefrate + + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + + } + + //-------------------------------------------------------- + //- + //- Ice deposition assuming ice PSD + //- + //-------------------------------------------------------- + } else if (idepice == 2) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*RG) + //-------------------------------------------------------------- + + if (za[jl + klon*(-1 + jk + klev*ibl)] < (*yrecldp).rcldtopcf && + za[jl + klon*(jk + klev*ibl)] >= (*yrecldp).rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jl + klon*(jk + klev*ibl)] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))*rv / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq - (double) 0.639); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + // Particle size distribution + ztcg = (double) 1.0; + zfacx1i = (double) 1.0; + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice* + ztp1[jl + klon*(jk + klev*ibl)] + pap[jl + klon*(jk + klev*ibl)]* + (*yrecldp).rcl_apb3*(pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 3.)); + zcorrfac = pow(((double) 1.0 / zrho), (double) 0.5); + zcorrfac2 = (pow((ztp1[jl + klon*(jk + klev*ibl)] / (double) 273.0), (double)1.5))* + ((double) 393.0 / (ztp1[jl + klon*(jk + klev*ibl)] + (double) 120.0)); + + zpr02 = zrho*zice0*(*yrecldp).rcl_const1i / (ztcg*zfacx1i); + + zterm1 = (zvpliq - zvpice)*(pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 2.0))* + zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2i*zfacx1i / (zrho*zaplusb*zvpice); + zterm2 = (double) 0.65*(*yrecldp).rcl_const6i*(pow(zpr02, (*yrecldp).rcl_const4i)) + + (*yrecldp).rcl_const3i*(pow(zcorrfac, (double) 0.5))*(pow(zrho, (double) 0.5))* + (pow(zpr02, (*yrecldp).rcl_const5i)) / (pow(zcorrfac2, (double) 0.5)); + + zdepos = fmax(za[jl + klon*(jk + klev*ibl)]*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)* + ((*yrecldp).rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + } + + } + // on IDEPICE + + //###################################################################### + // 4 *** PRECIPITATION PROCESSES *** + //###################################################################### + + //---------------------------------- + // revise in-cloud consensate amount + //---------------------------------- + ztmpa = (double) 1.0 / fmax(za[jl + klon*(jk + klev*ibl)], zepsec); + zliqcld = zqxfg[0]*ztmpa; + zicecld = zqxfg[1]*ztmpa; + zlicld = zliqcld + zicecld; + + //---------------------------------------------------------------------- + // 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + // now that rain, snow, graupel species are prognostic + // the precipitation flux can be defined directly level by level + // There is no vertical memory required from the flux variable + //---------------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm] || jm + 1 == 2) { + //------------------------ + // source from layer above + //------------------------ + if (jk + 1 > (*yrecldp).ncldtop) { + zfallsrce[jm] = zpfplsx[jl + klon*(jk + (klev + 1)*(jm + 5*ibl))]*zdtgdp; + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zfallsrce[jm]; + zqxfg[jm] = zqxfg[jm] + zfallsrce[jm]; + // use first guess precip----------V + zqpretot = zqpretot + zqxfg[jm]; + } + //------------------------------------------------- + // sink to next layer, constant fall speed + //------------------------------------------------- + // if aerosol effect then override + // note that for T>233K this is the same as above. + if ((*yrecldp).laericesed && jm + 1 == 2) { + zre_ice = pre_ice[jl + klon*(jk + klev*ibl)]; + // The exponent value is from + // Morrison et al. JAS 2005 Appendix + zvqx[1] = (double) 0.002*(pow(zre_ice, (double) 1.0)); + } + zfall = zvqx[jm]*zrho; + //------------------------------------------------- + // modified by Heymsfield and Iaquinta JAS 2000 + //------------------------------------------------- + // ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + // &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + zfallsink[jm] = zdtgdp*zfall; + // Cloud budget diagnostic stored at end as implicit + // jl + } + // LLFALL + } + // jm + + //--------------------------------------------------------------- + // Precip cover overlap using MAX-RAN Overlap + // Since precipitation is now prognostic we must + // 1) apply an arbitrary minimum coverage (0.3) if precip>0 + // 2) abandon the 2-flux clr/cld treatment + // 3) Thus, since we have no memory of the clear sky precip + // fraction, we mimic the previous method by reducing + // ZCOVPTOT(JL), which has the memory, proportionally with + // the precip evaporation rate, taking cloud fraction + // into account + // #3 above leads to much smoother vertical profiles of + // precipitation fraction than the Klein-Jakob scheme which + // monotonically increases precip fraction and then resets + // it to zero in a step function once clear-sky precip reaches + // zero. + //--------------------------------------------------------------- + if (zqpretot > zepsec) { + zcovptot = (double) 1.0 - (((double) 1.0 - zcovptot)*((double) 1.0 - + fmax(za[jl + klon*(jk + klev*ibl)], za[jl + klon*(-1 + jk + klev*ibl)])) / + ((double) 1.0 - fmin(za[jl + klon*(-1 + jk + klev*ibl)], (double) 1.0 - (double) 1.E-06))); + zcovptot = fmax(zcovptot, (*yrecldp).rcovpmin); + zcovpclr = fmax((double) 0.0, zcovptot - za[jl + klon*(jk + klev*ibl)]); // clear sky proportion + zraincld = zqxfg[2] / zcovptot; + zsnowcld = zqxfg[3] / zcovptot; + zcovpmax = fmax(zcovptot, zcovpmax); + } else { + zraincld = (double) 0.0; + zsnowcld = (double) 0.0; + zcovptot = (double) 0.0; // no flux - reset cover + zcovpclr = (double) 0.0; // reset clear sky proportion + zcovpmax = (double) 0.0; // reset max cover for ZZRH calc + } + + //---------------------------------------------------------------------- + // 4.3a AUTOCONVERSION TO SNOW + //---------------------------------------------------------------------- + + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt) { + //----------------------------------------------------- + // Snow Autoconversion rate follow Lin et al. 1983 + //----------------------------------------------------- + if (zicecld > zepsec) { + + zzco = ptsphy*(*yrecldp).rsnowlin1*exp((*yrecldp).rsnowlin2* + (ztp1[jl + klon*(jk + klev*ibl)] - rtt)); + + if ((*yrecldp).laericeauto) { + zlcrit = picrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=0.027 + zzco = zzco*(pow(((*yrecldp).rnice / pnice[jl + klon*(jk + klev*ibl)]), (double) 0.333)); + } else { + zlcrit = (*yrecldp).rlcritsnow; + } + + zsnowaut = zzco*((double) 1.0 - exp(-(pow((zicecld / zlcrit), 2)))); + zsolqb[3 + 5*(1)] = zsolqb[3 + 5*(1)] + zsnowaut; + + } + } + + //---------------------------------------------------------------------- + // 4.3b AUTOCONVERSION WARM CLOUDS + // Collection and accretion will require separate treatment + // but for now we keep this simple treatment + //---------------------------------------------------------------------- + + if (zliqcld > zepsec) { + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Sundqvist (1989) + //- + //-------------------------------------------------------- + if (iwarmrain == 1) { + + zzco = (*yrecldp).rkconv*ptsphy; + + if ((*yrecldp).laerliqautolsp) { + zlcrit = plcrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=125 cm-3 + zzco = zzco*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), (double) 0.333)); + } else { + // Modify autoconversion threshold dependent on: + // land (polluted, high CCN, smaller droplets, higher threshold) + // sea (clean, low CCN, larger droplets, lower threshold) + if (plsm[jl + klon*ibl] > (double) 0.5) { + zlcrit = (*yrecldp).rclcrit_land; // land + } else { + zlcrit = (*yrecldp).rclcrit_sea; // ocean + } + } + + //------------------------------------------------------------------ + // Parameters for cloud collection by rain and snow. + // Note that with new prognostic variable it is now possible + // to REPLACE this with an explicit collection parametrization + //------------------------------------------------------------------ + zprecip = (zpfplsx[jl + klon*(jk + (klev + 1)*(3 + 5*ibl))] + + zpfplsx[jl + klon*(jk + (klev + 1)*(2 + 5*ibl))]) / fmax(zepsec, zcovptot); + zcfpr = (double) 1.0 + (*yrecldp).rprc1*sqrt(fmax(zprecip, (double) 0.0)); + // ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + // &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + if ((*yrecldp).laerliqcoll) { + // 5.0 = N**0.333 with N=125 cm-3 + zcfpr = zcfpr*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), (double) 0.333)); + } + + zzco = zzco*zcfpr; + zlcrit = zlcrit / fmax(zcfpr, zepsec); + + if (zliqcld / zlcrit < (double) 20.0) { + // Security for exp for some compilers + zrainaut = zzco*((double) 1.0 - exp(-(pow((zliqcld / zlcrit), 2)))); + } else { + zrainaut = zzco; + } + + // rain freezes instantly + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt) { + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zrainaut; + } else { + zsolqb[2 + 5*(0)] = zsolqb[2 + 5*(0)] + zrainaut; + } + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Khairoutdinov and Kogan (2000) + //- + //-------------------------------------------------------- + } else if (iwarmrain == 2) { + + if (plsm[jl + klon*ibl] > (double) 0.5) { + // land + zconst = (*yrecldp).rcl_kk_cloud_num_land; + zlcrit = (*yrecldp).rclcrit_land; + } else { + // ocean + zconst = (*yrecldp).rcl_kk_cloud_num_sea; + zlcrit = (*yrecldp).rclcrit_sea; + } + + if (zliqcld > zlcrit) { + + zrainaut = (double) 1.5*za[jl + klon*(jk + klev*ibl)]*ptsphy* + (*yrecldp).rcl_kkaau*(pow(zliqcld, (*yrecldp).rcl_kkbauq))* + (pow(zconst, (*yrecldp).rcl_kkbaun)); + + zrainaut = fmin(zrainaut, zqxfg[0]); + if (zrainaut < zepsec) { + zrainaut = (double) 0.0; + } + + zrainacc = (double) 2.0*za[jl + klon*(jk + klev*ibl)]*ptsphy*(*yrecldp).rcl_kkaac* + (pow((zliqcld*zraincld), (*yrecldp).rcl_kkbac)); + + zrainacc = fmin(zrainacc, zqxfg[0]); + if (zrainacc < zepsec) { + zrainacc = (double) 0.0; + } + + } else { + zrainaut = (double) 0.0; + zrainacc = (double) 0.0; + } + + // If temperature < 0, then autoconversion produces snow rather than rain + // Explicit + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt) { + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainaut; + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainaut; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainacc; + } else { + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainaut; + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainaut; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainacc; + } + + } + // on IWARMRAIN + + } + // on ZLIQCLD > ZEPSEC + + + //---------------------------------------------------------------------- + // RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + // only active if T<0degC and supercooled liquid water is present + // AND if not Sundquist autoconversion (as this includes riming) + //---------------------------------------------------------------------- + if (iwarmrain > 1) { + + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt && zliqcld > zepsec) { + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), (double) 0.4); + + //------------------------------------------------------------------ + // Riming of snow by cloud water - implicit in lwc + //------------------------------------------------------------------ + if (zsnowcld > zepsec && zcovptot > (double) 0.01) { + + // Calculate riming term + // Factor of liq water taken out because implicit + zsnowrime = (double) 0.3*zcovptot*ptsphy*(*yrecldp).rcl_const7s* + zfallcorr*(pow((zrho*zsnowcld*(*yrecldp).rcl_const1s), (*yrecldp).rcl_const8s)); + + // Limit snow riming term + zsnowrime = fmin(zsnowrime, (double) 1.0); + + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zsnowrime; + + } + + //------------------------------------------------------------------ + // Riming of ice by cloud water - implicit in lwc + // NOT YET ACTIVE + //------------------------------------------------------------------ + // IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + // + // ! Calculate riming term + // ! Factor of liq water taken out because implicit + // ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + // & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + // + // ! Limit ice riming term + // ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + // + // ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + // + // ENDIF + } + + } + // on IWARMRAIN > 1 + + + //---------------------------------------------------------------------- + // 4.4a MELTING OF SNOW and ICE + // with new implicit solver this also has to treat snow or ice + // precipitating from the level above... i.e. local ice AND flux. + // in situ ice and snow: could arise from LS advection or warming + // falling ice and snow: arrives by precipitation process + //---------------------------------------------------------------------- + + zicetot = zqxfg[1] + zqxfg[3]; + zmeltmax = (double) 0.0; + + // If there are frozen hydrometeors present and dry-bulb temperature > 0degC + if (zicetot > zepsec && ztp1[jl + klon*(jk + klev*ibl)] > rtt) { + + // Calculate subsaturation + zsubsat = fmax(zqsice[jl + klon*(jk + klev*ibl)] - + zqx[jl + klon*(jk + klev*(4 + 5*ibl))], (double) 0.0); + + // Calculate difference between dry-bulb (ZTP1) and the temperature + // at which the wet-bulb=0degC (RTT-ZSUBSAT*....) using an approx. + // Melting only occurs if the wet-bulb temperature >0 + // i.e. warming of ice particle due to melting > cooling + // due to evaporation. + ztdmtw0 = ztp1[jl + klon*(jk + klev*ibl)] - rtt - zsubsat*(ztw1 + ztw2*(pap[jl + klon*(jk + klev*ibl)] - ztw3) - + ztw4*(ztp1[jl + klon*(jk + klev*ibl)] - ztw5)); + // Not implicit yet... + // Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*ztdmtw0) / (*yrecldp).rtaumel); + zmeltmax = fmax(ztdmtw0*zcons1*zrldcp, (double) 0.0); + } + + // Loop over frozen hydrometeors (ice, snow) + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (iphase[jm] == 2) { + jn = imelt[jm]; + if (zmeltmax > zepsec && zicetot > zepsec) { + // Apply melting in same proportion as frozen hydrometeor fractions + zalfa = zqxfg[jm] / zicetot; + zmelt = fmin(zqxfg[jm], zalfa*zmeltmax); + // needed in first guess + // This implies that zqpretot has to be recalculated below + // since is not conserved here if ice falls and liquid doesn't + zqxfg[jm] = zqxfg[jm] - zmelt; + zqxfg[-1 + jn] = zqxfg[-1 + jn] + zmelt; + zsolqa[-1 + jn + 5*jm] = zsolqa[-1 + jn + 5*jm] + zmelt; + zsolqa[jm + 5*(-1 + jn)] = zsolqa[jm + 5*(-1 + jn)] - zmelt; + } + } + } + + //---------------------------------------------------------------------- + // 4.4b FREEZING of RAIN + //---------------------------------------------------------------------- + + // If rain present + if (zqx[jl + klon*(jk + klev*(2 + 5*ibl))] > zepsec) { + + if (ztp1[jl + klon*(jk + klev*ibl)] <= rtt && ztp1[jl + klon*(-1 + jk + klev*ibl)] > rtt) { + // Base of melting layer/top of refreezing layer so + // store rain/snow fraction for precip type diagnosis + // If mostly rain, then supercooled rain slow to freeze + // otherwise faster to freeze (snow or ice pellets) + zqpretot = fmax(zqx[jl + klon*(jk + klev*(3 + 5*ibl))] + + zqx[jl + klon*(jk + klev*(2 + 5*ibl))], zepsec); + prainfrac_toprfz[jl + klon*ibl] = zqx[jl + klon*(jk + klev*(2 + 5*ibl))] / zqpretot; + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + llrainliq = true; + } else { + llrainliq = false; + } + } + + // If temperature less than zero + if (ztp1[jl + klon*(jk + klev*ibl)] < rtt) { + + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + + // Majority of raindrops completely melted + // Refreezing is by slow heterogeneous freezing + + // Slope of rain particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zqx[jl + klon*(jk + klev*(2 +5*ibl))])), + (*yrecldp).rcl_fac2); + + // Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ztemp = (*yrecldp).rcl_fzrab*(ztp1[jl + klon*(jk + klev*ibl)] - rtt); + zfrz = ptsphy*((*yrecldp).rcl_const5r / zrho)*(exp(ztemp) - + (double) 1.)*(pow(zlambda, (*yrecldp).rcl_const6r)); + zfrzmax = fmax(zfrz, (double) 0.0); + + } else { + + // Majority of raindrops only partially melted + // Refreeze with a shorter timescale (reverse of melting...for now) + + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*(rtt - + ztp1[jl + klon*(jk + klev*ibl)])) / (*yrecldp).rtaumel); + zfrzmax = fmax((rtt - ztp1[jl + klon*(jk + klev*ibl)])*zcons1*zrldcp, (double) 0.0); + + } + + if (zfrzmax > zepsec) { + zfrz = fmin(zqx[jl + klon*(jk + klev*(2 + 5*ibl))], zfrzmax); + zsolqa[3 + 5*(2)] = zsolqa[3 + 5*(2)] + zfrz; + zsolqa[2 + 5*(3)] = zsolqa[2 + 5*(3)] - zfrz; + } + } + + } + + + //---------------------------------------------------------------------- + // 4.4c FREEZING of LIQUID + //---------------------------------------------------------------------- + // not implicit yet... + zfrzmax = fmax(((*yrecldp).rthomo - ztp1[jl + klon*(jk + klev*ibl)])*zrldcp, (double) 0.0); + + jm = 1; + jn = imelt[-1 + jm]; + if (zfrzmax > zepsec && zqxfg[-1 + jm] > zepsec) { + zfrz = fmin(zqxfg[-1 + jm], zfrzmax); + zsolqa[-1 + jn + 5*(-1 + jm)] = zsolqa[-1 + jn + 5*(-1 + jm)] + zfrz; + zsolqa[-1 + jm + 5*(-1 + jn)] = zsolqa[-1 + jm + 5*(-1 + jn)] - zfrz; + } + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF RAIN/SNOW + //---------------------------------------------------------------------- + + //---------------------------------------- + // Rain evaporation scheme from Sundquist + //---------------------------------------- + if (ievaprain == 1) { + + // Rain + + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsliq[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsliq[jl + klon*(jk + klev*ibl)])); + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + // note: zpreclr is a rain flux + zpreclr = zqxfg[2]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]) / + (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(double) 0.5*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsliq; + zdpr = zcovpclr*zbeta*(zqsliq[jl + klon*(jk + klev*ibl)] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Evaporate rain + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jl + klon*(jk + klev*ibl)])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + + //--------------------------------------------------------- + // Rain evaporation scheme based on Abel and Boutle (2013) + //--------------------------------------------------------- + } else if (ievaprain == 2) { + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for rain evaporation + // to avoid cloud formation and saturation of the grid box + //----------------------------------------------------------------------- + // Limit RH for rain evaporation dependent on precipitation fraction + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + // Critical relative humidity + //ZRHC=RAMID + //ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + //IF(ZSIGK > 0.8_JPRB) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + //ENDIF + //ZZRH = MIN(ZRHC,ZZRH) + + // Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + zzrh = fmin((double) 0.8, zzrh); + + zqe = fmax((double) 0.0, fmin(zqx[jl + klon*(jk + klev*(4 + 5*ibl))], + zqsliq[jl + klon*(jk + klev*ibl)])); + + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && + zqe < zzrh*zqsliq[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + + //------------------------------------------- + // Abel and Boutle (2012) evaporation + //------------------------------------------- + // Calculate local precipitation (kg/kg) + zpreclr = zqxfg[2] / zcovptot; + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), 0.4); + + // Saturation vapour pressure with respect to liquid phase + zesatliq = rv / rd*((double)(r2es*exp((r3les*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/ + (ztp1[jl + klon*(jk + klev*ibl)] - r4les)))); + + // Slope of particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zpreclr)), (*yrecldp).rcl_fac2); // ZPRECLR=kg/kg + + zevap_denom = (*yrecldp).rcl_cdenom1*zesatliq - (*yrecldp).rcl_cdenom2* + ztp1[jl + klon*(jk + klev*ibl)]*zesatliq + (*yrecldp).rcl_cdenom3* + (pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 3.))*pap[jl + klon*(jk + klev*ibl)]; + + // Temperature dependent conductivity + zcorr2 = (pow((ztp1[jl + klon*(jk + klev*ibl)] / (double) 273.), (double) 1.5))*(double) 393. / + (ztp1[jl + klon*(jk + klev*ibl)] + (double) 120.); + zka = (*yrecldp).rcl_ka273*zcorr2; + + zsubsat = fmax(zzrh*zqsliq[jl + klon*(jk + klev*ibl)] - zqe, (double) 0.0); + + zbeta = ((double) 0.5 / zqsliq[jl + klon*(jk + klev*ibl)])* + (pow(ztp1[jl + klon*(jk + klev*ibl)], (double) 2.))*zesatliq*(*yrecldp).rcl_const1r*(zcorr2 / + zevap_denom)*((double) 0.78 / (pow(zlambda, (*yrecldp).rcl_const4r)) + + (*yrecldp).rcl_const2r*(pow((zrho*zfallcorr), (double) 0.5)) / ((pow(zcorr2, + (double) 0.5))*(pow(zlambda, (*yrecldp).rcl_const3r)))); + + zdenom = (double) 1.0 + zbeta*ptsphy; //*ZCORQSLIQ(JL) + zdpevap = zcovpclr*zbeta*ptsphy*zsubsat / zdenom; + + //--------------------------------------------------------- + // Add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Limit rain evaporation + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jl + klon*(jk + klev*ibl)])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + } + // on IEVAPRAIN + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF SNOW + //---------------------------------------------------------------------- + // Snow + if (ievapsnow == 1) { + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice[jl + klon*(jk + klev*ibl)])); + llo1 = zcovpclr > zepsec && zqxfg[3] > zepsec && + zqe < zzrh*zqsice[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + // note: zpreclr is a rain flux a + zpreclr = zqxfg[3]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]) / + (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsice; + zdpr = zcovpclr*zbeta*(zqsice[jl + klon*(jk + klev*ibl)] - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce snow to zero and model + // produces small amounts of snowfall everywhere. + //--------------------------------------------------------- + + // Evaporate snow + zevap = fmin(zdpevap, zqxfg[3]); + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jl + klon*(jk + klev*ibl)])*zevap / zqxfg[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + //--------------------------------------------------------- + } else if (ievapsnow == 2) { + + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for snow evaporation + //----------------------------------------------------------------------- + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax)*zcovpmax / + fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[jl + klon*(jk + klev*(4 + 5*ibl))] - za[jl + klon*(jk + klev*ibl)]* + zqsice[jl + klon*(jk + klev*ibl)]) / fmax(zepsec, (double) 1.0 - za[jl + klon*(jk + klev*ibl)]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice[jl + klon*(jk + klev*ibl)])); + llo1 = zcovpclr > zepsec && zqx[jl + klon*(jk + klev*(3 + 5*ibl))] > zepsec + && zqe < zzrh*zqsice[jl + klon*(jk + klev*ibl)]; + + if (llo1) { + + // Calculate local precipitation (kg/kg) + zpreclr = zqx[jl + klon*(jk + klev*(3 + 5*ibl))] / zcovptot; + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jl + klon*(jk + klev*ibl)] - rtt))/(ztp1[jl + klon*(jk + klev*ibl)] - r4ies))))*rv / rd; + + // Particle size distribution + // ZTCG increases Ni with colder temperatures - essentially a + // Fletcher or Meyers scheme? + ztcg = (double) 1.0; //v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + // ZFACX1I modification is based on Andrew Barrett's results + zfacx1s = (double) 1.0; //v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice* + ztp1[jl + klon*(jk + klev*ibl)] + pap[jl + klon*(jk + klev*ibl)]*(*yrecldp).rcl_apb3* + (pow(ztp1[jl + klon*(jk + klev*ibl)], 3)); + zcorrfac = pow((1.0 / zrho), 0.5); + zcorrfac2 = (pow((ztp1[jl + klon*(jk + klev*ibl)] / 273.0), 1.5))*(393.0 / (ztp1[jl + klon*(jk + klev*ibl)] + 120.0)); + + zpr02 = zrho*zpreclr*(*yrecldp).rcl_const1s / (ztcg*zfacx1s); + + zterm1 = (zqsice[jl + klon*(jk + klev*ibl)] - zqe)*(pow(ztp1[jl + klon*(jk + klev*ibl)], 2))* + zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2s*zfacx1s / (zrho*zaplusb*zqsice[jl + klon*(jk + klev*ibl)]); + zterm2 = 0.65*(*yrecldp).rcl_const6s*(pow(zpr02, (*yrecldp).rcl_const4s)) + + (*yrecldp).rcl_const3s*(pow(zcorrfac, 0.5))*(pow(zrho, 0.5))*(pow(zpr02, + (*yrecldp).rcl_const5s)) / (pow(zcorrfac2, 0.5)); + + zdpevap = fmax(zcovpclr*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit evaporation to snow amount + //-------------------------------------------------------------------- + zevap = fmin(zdpevap, zevaplimice); + zevap = fmin(zevap, zqx[jl + klon*(jk + klev*(3 + 5*ibl))]); + + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, (zcovptot - + za[jl + klon*(jk + klev*ibl)])*zevap / zqx[jl + klon*(jk + klev*(3 + 5*ibl))])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + + } + // on IEVAPSNOW + + //-------------------------------------- + // Evaporate small precipitation amounts + //-------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm]) { + if (zqxfg[jm] < (*yrecldp).rlmin) { + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zqxfg[jm]; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zqxfg[jm]; + } + } + } + + //###################################################################### + // 5.0 *** SOLVERS FOR A AND L *** + // now use an implicit solution rather than exact solution + // solver is forward in time, upstream difference for advection + //###################################################################### + + //--------------------------- + // 5.1 solver for cloud cover + //--------------------------- + zanew = (za[jl + klon*(jk + klev*ibl)] + zsolac) / ((double) 1.0 + zsolab); + zanew = fmin(zanew, (double) 1.0); + if (zanew < (*yrecldp).ramin) { + zanew = (double) 0.0; + } + zda = zanew - zaorig[jl + klon*(jk + klev*ibl)]; + //--------------------------------- + // variables needed for next level + //--------------------------------- + zanewm1 = zanew; + + //-------------------------------- + // 5.2 solver for the microphysics + //-------------------------------- + + //-------------------------------------------------------------- + // Truncate explicit sinks to avoid negatives + // Note: Species are treated in the order in which they run out + // since the clipping will alter the balance for the other vars + //-------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + llindex3[jn + 5*jm] = false; + } + zsinksum[jm] = (double) 0.0; + } + + //---------------------------- + // collect sink terms and mark + //---------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsinksum[jm] = zsinksum[jm] - zsolqa[jm + 5*jn]; // +ve total is bad + } + } + + //--------------------------------------- + // calculate overshoot and scaling factor + //--------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zmax = fmax(zqx[jl + klon*(jk + klev*(jm + 5*ibl))], zepsec); + zrat = fmax(zsinksum[jm], zmax); + zratio[jm] = zmax / zrat; + } + + //-------------------------------------------- + // scale the sink terms, in the correct order, + // recalculating the scale factor each time + //-------------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zsinksum[jm] = (double) 0.0; + } + + //---------------- + // recalculate sum + //---------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + psum_solqa = 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + psum_solqa = psum_solqa + zsolqa[jm + 5*jn]; + } + // ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + zsinksum[jm] = zsinksum[jm] - psum_solqa; + //--------------------------- + // recalculate scaling factor + //--------------------------- + zmm = fmax(zqx[jl + klon*(jk + klev*(jm + 5*ibl))], zepsec); + zrr = fmax(zsinksum[jm], zmm); + zratio[jm] = zmm / zrr; + //------ + // scale + //------ + zzratio = zratio[jm]; + //DIR$ IVDEP + //DIR$ PREFERVECTOR + for (jn = 0; jn <= 5 + -1; jn += 1) { + if (zsolqa[jm + 5*jn] < (double) 0.0) { + zsolqa[jm + 5*jn] = zsolqa[jm + 5*jn]*zzratio; + zsolqa[jn + 5*jm] = zsolqa[jn + 5*jm]*zzratio; + } + } + } + + //-------------------------------------------------------------- + // 5.2.2 Solver + //------------------------ + + //------------------------ + // set the LHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + //---------------------------------------------- + // diagonals: microphysical sink terms+transport + //---------------------------------------------- + if (jn + 1 == jm + 1) { + zqlhs[jn + 5*jm] = (double) 1.0 + zfallsink[jm]; + for (jo = 0; jo <= 5 + -1; jo += 1) { + zqlhs[jn + 5*jm] = zqlhs[jn + 5*jm] + zsolqb[jo + 5*jn]; + } + //------------------------------------------ + // non-diagonals: microphysical source terms + //------------------------------------------ + } else { + zqlhs[jn + 5*jm] = -zsolqb[jn + 5*jm]; // here is the delta T - missing from doc. + } + } + } + + //------------------------ + // set the RHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + //--------------------------------- + // sum the explicit source and sink + //--------------------------------- + zexplicit = (double) 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + zexplicit = zexplicit + zsolqa[jm + 5*jn]; // sum over middle index + } + zqxn[jm] = zqx[jl + klon*(jk + klev*(jm + 5*ibl))] + zexplicit; + } + + //----------------------------------- + // *** solve by LU decomposition: *** + //----------------------------------- + + // Note: This fast way of solving NCLVxNCLV system + // assumes a good behaviour (i.e. non-zero diagonal + // terms with comparable orders) of the matrix stored + // in ZQLHS. For the moment this is the case but + // be aware to preserve it when doing eventual + // modifications. + + // Non pivoting recursive factorization + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + // number of steps + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + // row index + zqlhs[jm + 5*jn] = zqlhs[jm + 5*jn] / zqlhs[jn + 5*jn]; + for (ik = jn + 1; ik <= 5 + -1; ik += 1) { + // column index + zqlhs[jm + 5*ik] = zqlhs[jm + 5*ik] - zqlhs[jm + 5*jn]*zqlhs[jn + 5*ik]; + } + } + } + + // Backsubstitution + // step 1 + for (jn = 1; jn <= 5 + -1; jn += 1) { + for (jm = 0; jm <= jn + 1 - 1 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + } + // step 2 + zqxn[4] = zqxn[4] / zqlhs[4 + 5*(4)]; + for (jn = -2 + 5; jn >= 1 + -1; jn += -1) { + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + zqxn[jn] = zqxn[jn] / zqlhs[jn + 5*jn]; + } + + // Ensure no small values (including negatives) remain in cloud variables nor + // precipitation rates. + // Evaporate l,i,r,s to water vapour. Latent heating taken into account below + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + if (zqxn[jn] < zepsec) { + zqxn[4] = zqxn[4] + zqxn[jn]; + zqxn[jn] = (double) 0.0; + } + } + + //-------------------------------- + // variables needed for next level + //-------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxnm1[jm] = zqxn[jm]; + zqxn2d[jl + klon*(jk + klev*(jm + 5*ibl))] = zqxn[jm]; + } + + //------------------------------------------------------------------------ + // 5.3 Precipitation/sedimentation fluxes to next level + // diagnostic precipitation fluxes + // It is this scaled flux that must be used for source to next layer + //------------------------------------------------------------------------ + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[jl + klon*(1 + jk + (klev + 1)*(jm + 5*ibl))] = + zfallsink[jm]*zqxn[jm]*zrdtgdp; + } + + // Ensure precipitation fraction is zero if no precipitation + zqpretot = zpfplsx[jl + klon*(1 + jk + (klev + 1)*(3 + 5*ibl))] + + zpfplsx[jl + klon*(1 + jk + (klev + 1)*(2 + 5*ibl))]; + if (zqpretot < zepsec) { + zcovptot = (double) 0.0; + } + + //###################################################################### + // 6 *** UPDATE TENDANCIES *** + //###################################################################### + + //-------------------------------- + // 6.1 Temperature and CLV budgets + //-------------------------------- + + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + + // calculate fluxes in and out of box for conservation of TL + zfluxq[jm] = zpsupsatsrce[jm] + zconvsrce[jm] + zfallsrce[jm] - + (zfallsink[jm] + zconvsink[jm])*zqxn[jm]; + + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + klon*(jk + klev*ibl)] + + ralvdcp*(zqxn[jm] - zqx[jl + klon*(jk + klev*(jm + 5*ibl))] - zfluxq[jm])*zqtmst; + } + + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + klon*(jk + klev*ibl)] + + ralsdcp*(zqxn[jm] - zqx[jl + klon*(jk + klev*(jm + 5*ibl))] - zfluxq[jm])*zqtmst; + } + + //---------------------------------------------------------------------- + // New prognostic tendencies - ice,liquid rain,snow + // Note: CLV arrays use PCLV in calculation of tendency while humidity + // uses ZQX. This is due to clipping at start of cloudsc which + // include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + //---------------------------------------------------------------------- + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] + + (zqxn[jm] - zqx0[jl + klon*(jk + klev*(jm + 5*ibl))])*zqtmst; + + } + + //---------------------- + // 6.2 Humidity budget + //---------------------- + tendency_loc_q[jl + klon*(jk + klev*ibl)] = tendency_loc_q[jl + klon*(jk + klev*ibl)] + + (zqxn[4] - zqx[jl + klon*(jk + klev*(4 + 5*ibl))])*zqtmst; + + //------------------- + // 6.3 cloud cover + //----------------------- + tendency_loc_a[jl + klon*(jk + klev*ibl)] = tendency_loc_a[jl + klon*(jk + klev*ibl)] + zda*zqtmst; + + //-------------------------------------------------- + // Copy precipitation fraction into output variable + //------------------------------------------------- + pcovptot[jl + klon*(jk + klev*ibl)] = zcovptot; + + } + // on vertical level JK + //---------------------------------------------------------------------- + // END OF VERTICAL LOOP + //---------------------------------------------------------------------- + + //###################################################################### + // 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + //###################################################################### + + //-------------------------------------------------------------------- + // Copy general precip arrays back into PFP arrays for GRIB archiving + // Add rain and liquid fluxes, ice and snow fluxes + //-------------------------------------------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfplsl[jl + klon*(jk + (klev + 1)*ibl)] = zpfplsx[jl + klon*(jk + (klev + 1)*(2 + 5*ibl))] + + zpfplsx[jl + klon*(jk + (klev + 1)*(0 + 5*ibl))]; + pfplsn[jl + klon*(jk + (klev + 1)*ibl)] = zpfplsx[jl + klon*(jk + (klev + 1)*(3 + 5*ibl))] + + zpfplsx[jl + klon*(jk + (klev + 1)*(1 + 5*ibl))]; + } + + //-------- + // Fluxes: + //-------- + pfsqlf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqif[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqrf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqsf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqlng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqnng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqrng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //rain + pfcqsng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //snow + // fluxes due to turbulence + pfsqltur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqitur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + + for (jk = 0; jk <= klev + -1; jk += 1) { + + zgdph_r = -zrg_r*(paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - + paph[jl + klon*(jk + (klev + 1)*ibl)])*zqtmst; + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqltur[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqitur[jl + klon*(jk + (klev + 1)*ibl)]; + + zalfaw = zfoealfa[jl + klon*(jk + (klev + 1)*ibl)]; + + // Liquid , LS scheme minus detrainment + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(0 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(0 + 5*ibl))] + + pvfl[jl + klon*(jk + klev*ibl)]*ptsphy - zalfaw*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // liquid, negative numbers + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(0 + 5*ibl))]*zgdph_r; + + // liquid, vertical diffusion + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] + + pvfl[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // Rain, LS scheme + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(2 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(2 + 5*ibl))])*zgdph_r; + // rain, negative numbers + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(2 + 5*ibl))]*zgdph_r; + + // Ice , LS scheme minus detrainment + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(1 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(1 + 5*ibl))] + + pvfi[jl + klon*(jk + klev*ibl)]*ptsphy - ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // ice, negative numbers + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(1 + 5*ibl))]*zgdph_r; + + // ice, vertical diffusion + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] + + pvfi[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // snow, LS scheme + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] + + (zqxn2d[jl + klon*(jk + klev*(3 + 5*ibl))] - zqx0[jl + klon*(jk + klev*(3 + 5*ibl))])*zgdph_r; + // snow, negative numbers + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] + + zlneg[jl + klon*(jk + klev*(3 + 5*ibl))]*zgdph_r; + } + + //----------------------------------- + // enthalpy flux due to precipitation + //----------------------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + pfhpsl[jl + klon*(jk + (klev + 1)*ibl)] = -rlvtt*pfplsl[jl + klon*(jk + (klev + 1)*ibl)]; + pfhpsn[jl + klon*(jk + (klev + 1)*ibl)] = -rlstt*pfplsn[jl + klon*(jk + (klev + 1)*ibl)]; + } +} + diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel b/src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel new file mode 100644 index 00000000..4af9ebd3 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel @@ -0,0 +1,2631 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#include +#include +#include + +void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, + const double * __restrict__ pt, + const double * __restrict__ pq, const double * __restrict__ tendency_tmp_t, + const double * __restrict__ tendency_tmp_q, const double * __restrict__ tendency_tmp_a, + const double * __restrict__ tendency_tmp_cld, double * __restrict__ tendency_loc_t, + double * __restrict__ tendency_loc_q, double * __restrict__ tendency_loc_a, + double * __restrict__ tendency_loc_cld, const double * __restrict__ pvfa, + const double * __restrict__ pvfl, const double * __restrict__ pvfi, const double * __restrict__ pdyna, + const double * __restrict__ pdynl, const double * __restrict__ pdyni, const double * __restrict__ phrsw, + double * __restrict__ phrlw, const double * __restrict__ pvervel, const double * __restrict__ pap, + const double * __restrict__ paph, const double * __restrict__ plsm, + const int * ktype, const double * __restrict__ plu, double * __restrict__ plude, + const double * __restrict__ psnde, const double * __restrict__ pmfu, const double * __restrict__ pmfd, + const double * __restrict__ pa, const double * __restrict__ pclv, const double * __restrict__ psupsat, + const double * __restrict__ plcrit_aer, const double * __restrict__ picrit_aer, + const double * __restrict__ pre_ice, const double * __restrict__ pccn, const double * __restrict__ pnice, + double * __restrict__ pcovptot, double * __restrict__ prainfrac_toprfz, + double * __restrict__ pfsqlf, double * __restrict__ pfsqif, double * __restrict__ pfcqnng, + double * __restrict__ pfcqlng, double * __restrict__ pfsqrf, double * __restrict__ pfsqsf, + double * __restrict__ pfcqrng, double * __restrict__ pfcqsng, + double * __restrict__ pfsqltur, double * __restrict__ pfsqitur, + double * __restrict__ pfplsl, double * __restrict__ pfplsn, double * __restrict__ pfhpsl, + double * __restrict__ pfhpsn, struct TECLDP *yrecldp, int ngpblks, + double rg, double rd, double rcpd, double retv, double rlvtt, double rlstt, double rlmlt, double rtt, + double rv, double r2es, double r3les, double r3ies, double r4les, double r4ies, double r5les, + double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, + double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, + double rkoop1, double rkoop2, + cl::sycl:: stream out_stream, cl::sycl::nd_item<1> item_ct) { + + //------------------------------------------------------------------------------- + // Declare input/output arguments + //------------------------------------------------------------------------------- + + // PLCRIT_AER : critical liquid mmr for rain autoconversion process + // PICRIT_AER : critical liquid mmr for snow autoconversion process + // PRE_LIQ : liq Re + // PRE_ICE : ice Re + // PCCN : liquid cloud condensation nuclei + // PNICE : ice number concentration (cf. CCN) + + const int klev = 137; // Number of levels + + double zlcond1, zlcond2, zlevapl, zlevapi, zrainaut, zsnowaut, zliqcld, zicecld; + double zlevap, zleros; + // condensation and evaporation terms + // autoconversion terms + double zfokoop; + double zfoealfa; + double zicenuclei; // number concentration of ice nuclei + + double zlicld; + double zacond; + double zaeros; + double zlfinalsum; + double zdqs; + double ztold; + double zqold; + double zdtgdp; + double zrdtgdp; + double ztrpaus; + double zcovpclr; + double zpreclr; + double zcovptot; + double zcovpmax; + double zqpretot; + double zdpevap; + double zdtforc; + double zdtdiab; + double ztp1[2]; + double zldefr; + double zldifdt; + double zdtgdpf; + double zlcust[5]; + double zacust; + double zmf; + + double zrho; + double ztmp1, ztmp2, ztmp3; + double ztmp4, ztmp5, ztmp6, ztmp7; + double zalfawm; + + // Accumulators of A,B,and C factors for cloud equations + double zsolab; // -ve implicit CC + double zsolac; // linear CC + double zanew; + double zanewm1; + + double zgdp; + + //---for flux calculation + double zda; + double zli; + double za[2]; + double zaorig; + + int llflag; + int llo1; + + int icall, ik, jk, jl, jm, jn, jo, jlen, is; + + double zdp, zpaphd; + + double zalfa; + // & ZALFACU, ZALFALS + double zalfaw; + double zbeta, zbeta1; + double zcfpr; + double zcor; + double zcdmax; + double zmin; + double zlcondlim; + double zdenom; + double zdpmxdt; + double zdpr; + double zdtdp; + double ze; + double zepsec; + double zfac, zfaci, zfacw; + double zgdcp; + double zinew; + double zlcrit; + double zmfdn; + double zprecip; + double zqe; + double zqsat, zqtmst, zrdcp; + double zrhc, zsig, zsigk; + double zwtot; + double zzco, zzdl, zzrh, zzzdt, zqadj; + double zqnew, ztnew; + double zrg_r, zgdph_r, zcons1, zcond, zcons1a; + double zlfinal; + double zmelt; + double zevap; + double zfrz; + double zvpliq, zvpice; + double zadd, zbdd, zcvds, zice0, zdepos; + double zsupsat; + double zfall; + double zre_ice; + double zrldcp; + double zqp1env; + + //---------------------------- + // Arrays for new microphysics + //---------------------------- + int iphase[5]; // marker for water phase of each species + // 0=vapour, 1=liquid, 2=ice + + int imelt[5]; // marks melting linkage for ice categories + // ice->liquid, snow->rain + + int llfall[5]; // marks falling species + // LLFALL=0, cloud cover must > 0 for zqx > 0 + // LLFALL=1, no cloud needed, zqx can evaporate + + int llindex1[5]; // index variable + int llindex3[5*5]; // index variable + double zmax; + double zrat; + int iorder[5]; // array for sorting explicit terms + + double zliqfrac; + double zicefrac; + double zqx[5]; + double zqx0[5]; + double zqxn[5]; // new values for zqx at time+1 + double zqxfg[5]; // first guess values including precip + double zqxnm1[5]; // new values for zqx at time+1 at level above + double zfluxq[5]; // fluxes convergence of species (needed?) + // Keep the following for possible future total water variance scheme? + //REAL(KIND=JPRB) :: ZTL(KLON,KLEV) ! liquid water temperature + //REAL(KIND=JPRB) :: ZABETA(KLON,KLEV) ! cloud fraction + //REAL(KIND=JPRB) :: ZVAR(KLON,KLEV) ! temporary variance + //REAL(KIND=JPRB) :: ZQTMIN(KLON,KLEV) + //REAL(KIND=JPRB) :: ZQTMAX(KLON,KLEV) + + double zlneg[5]; + double zmeltmax; + double zfrzmax; + double zicetot; + + double zqxn2d[5]; + + double zqsmix; + //REAL(KIND=JPRB) :: ZQSBIN(KLON,KLEV) ! binary switched ice/liq saturation + double zqsliq; + double zqsice; + + //REAL(KIND=JPRB) :: ZRHM(KLON,KLEV) ! diagnostic mixed phase RH + //REAL(KIND=JPRB) :: ZRHL(KLON,KLEV) ! RH wrt liq + //REAL(KIND=JPRB) :: ZRHI(KLON,KLEV) ! RH wrt ice + + double zfoeewmt; + double zfoeew; + double zfoeeliqt; + + //REAL(KIND=JPRB) :: ZFOEEICET(KLON,KLEV) + + double zdqsliqdt, zdqsicedt, zdqsmixdt; + double zcorqsliq; + double zcorqsice; + //REAL(KIND=JPRB) :: ZCORQSBIN(KLON) + double zcorqsmix; + double zevaplimliq, zevaplimice, zevaplimmix; + + //------------------------------------------------------- + // SOURCE/SINK array for implicit and explicit terms + //------------------------------------------------------- + // a POSITIVE value entered into the arrays is a... + // Source of this variable + // | + // | Sink of this variable + // | | + // V V + // ZSOLQA(JL,IQa,IQb) = explicit terms + // ZSOLQB(JL,IQa,IQb) = implicit terms + // Thus if ZSOLAB(JL,NCLDQL,IQV)=K where K>0 then this is + // a source of NCLDQL and a sink of IQV + // put 'magic' source terms such as PLUDE from + // detrainment into explicit source/sink array diagnognal + // ZSOLQA(NCLDQL,NCLDQL)= -PLUDE + // i.e. A positive value is a sink!????? weird... + //------------------------------------------------------- + + double zsolqa[5*5]; // explicit sources and sinks + double zsolqb[5*5]; // implicit sources and sinks + // e.g. microphysical pathways between ice variables. + double zqlhs[5*5]; // n x n matrix storing the LHS of implicit solver + double zvqx[5]; // fall speeds of three categories + double zexplicit; + double zratio[5], zsinksum[5]; + + // for sedimentation source/sink terms + double zfallsink[5]; + double zfallsrce[5]; + + // for convection detrainment source and subsidence source/sink terms + double zconvsrce[5]; + double zconvsink[5]; + + // for supersaturation source term from previous timestep + double zpsupsatsrce[5]; + + // Numerical fit to wet bulb temperature + double ztw1 = (double) 1329.31; + double ztw2 = (double) 0.0074615; + double ztw3 = (double) 0.85E5; + double ztw4 = (double) 40.637; + double ztw5 = (double) 275.0; + + double zsubsat; // Subsaturation for snow melting term + double ztdmtw0; // Diff between dry-bulb temperature and + // temperature when wet-bulb = 0degC + + // Variables for deposition term + double ztcg; // Temperature dependent function for ice PSD + double zfacx1i, zfacx1s; // PSD correction factor + double zaplusb, zcorrfac, zcorrfac2, zpr02, zterm1, zterm2; // for ice dep + double zcldtopdist; // Distance from cloud top + double zinfactor; // No. of ice nuclei factor for deposition + + // Autoconversion/accretion/riming/evaporation + int iwarmrain; + int ievaprain; + int ievapsnow; + int idepice; + double zrainacc; + double zraincld; + double zsnowrime; + double zsnowcld; + double zesatliq; + double zfallcorr; + double zlambda; + double zevap_denom; + double zcorr2; + double zka; + double zconst; + double ztemp; + + // Rain freezing + int llrainliq; // True if majority of raindrops are liquid (no ice core) + + //---------------------------- + // End: new microphysics + //---------------------------- + + //---------------------- + // SCM budget statistics + //---------------------- + double zrain; + + double zhook_handle; + double ztmpl, ztmpi, ztmpa; + + double zmm, zrr; + double zrg; + + double zzsum, zzratio; + double zepsilon; + + double zcond1, zqp; + + double psum_solqa; + + int ibl; + int i_llfall_0; + //double zqx[5 * klev]; + //double zqx0[5 * klev]; + double zpfplsx[5 * 2]; + //double zlneg[5 * klev]; + //double zqxn2d[5 * klev]; + + jl = item_ct.get_local_id(0); //threadIdx.x; + ibl = item_ct.get_group(0); // or 2? blockIdx.z; + + int jk_i; + int jk_ip1; + int jk_im1; + + zepsilon = (double) 100.*std::numeric_limits::epsilon(); //DBL_EPSILON; + + // --------------------------------------------------------------------- + // Set version of warm-rain autoconversion/accretion + // IWARMRAIN = 1 ! Sundquist + // IWARMRAIN = 2 ! Khairoutdinov and Kogan (2000) + // --------------------------------------------------------------------- + iwarmrain = 2; + // --------------------------------------------------------------------- + // Set version of rain evaporation + // IEVAPRAIN = 1 ! Sundquist + // IEVAPRAIN = 2 ! Abel and Boutle (2013) + // --------------------------------------------------------------------- + ievaprain = 2; + // --------------------------------------------------------------------- + // Set version of snow evaporation + // IEVAPSNOW = 1 ! Sundquist + // IEVAPSNOW = 2 ! New + // --------------------------------------------------------------------- + ievapsnow = 1; + // --------------------------------------------------------------------- + // Set version of ice deposition + // IDEPICE = 1 ! Rotstayn (2001) + // IDEPICE = 2 ! New + // --------------------------------------------------------------------- + idepice = 1; + + // --------------------- + // Some simple constants + // --------------------- + zqtmst = (double) 1.0 / ptsphy; + zgdcp = rg / rcpd; + zrdcp = rd / rcpd; + zcons1a = rcpd / (rlmlt*rg*(*yrecldp).rtaumel); + zepsec = (double) 1.E-14; + zrg_r = (double) 1.0 / rg; + zrldcp = (double) 1.0 / (ralsdcp - ralvdcp); + + // Note: Defined in module/yoecldp.F90 + // NCLDQL=1 ! liquid cloud water + // NCLDQI=2 ! ice cloud water + // NCLDQR=3 ! rain water + // NCLDQS=4 ! snow + // NCLDQV=5 ! vapour + + // ----------------------------------------------- + // Define species phase, 0=vapour, 1=liquid, 2=ice + // ----------------------------------------------- + iphase[4] = 0; + iphase[0] = 1; + iphase[2] = 1; + iphase[1] = 2; + iphase[3] = 2; + + // --------------------------------------------------- + // Set up melting/freezing index, + // if an ice category melts/freezes, where does it go? + // --------------------------------------------------- + imelt[4] = -99; + imelt[0] = 2; + imelt[2] = 4; + imelt[1] = 3; + imelt[3] = 3; + + // ----------------------------------------------- + // INITIALIZATION OF OUTPUT TENDENCIES + // ----------------------------------------------- + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_a[jl + klon*(jk + klev*ibl)] = (double) 0.0; + } + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + for (jk = 0; jk <= klev + -1; jk += 1) { + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = (double) 0.0; + } + } + + //-- These were uninitialized : meaningful only when we compare error differences + for (jk = 0; jk <= klev + -1; jk += 1) { + pcovptot[jl + klon*(jk + klev*ibl)] = (double) 0.0; + tendency_loc_cld[jl + klon*(jk + klev*(5 - 1 + 5*(ibl)))] = (double) 0.0; + } + + //-------- + // Fluxes: + //-------- + pfsqlf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqif[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqrf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqsf[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqlng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqnng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfcqrng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //rain + pfcqsng[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; //snow + // fluxes due to turbulence + pfsqltur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + pfsqitur[jl + klon*(0 + (klev + 1)*ibl)] = (double) 0.0; + + // ------------------------- + // set up fall speeds in m/s + // ------------------------- + zvqx[4] = (double) 0.0; + zvqx[0] = (double) 0.0; + zvqx[1] = (*yrecldp).rvice; + zvqx[2] = (*yrecldp).rvrain; + zvqx[3] = (*yrecldp).rvsnow; + for (i_llfall_0 = 0; i_llfall_0 <= 5 + -1; i_llfall_0 += 1) { + llfall[i_llfall_0] = false; + } + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (zvqx[jm] > (double) 0.0) { + llfall[jm] = true; + } + // falling species + } + // Set LLFALL to false for ice (but ice still sediments!) + // Need to rationalise this at some point + llfall[1] = false; + + prainfrac_toprfz[jl + klon*ibl] = (double) 0.0; // rain fraction at top of refreezing layer + llrainliq = true; // Assume all raindrops are liquid initially + + //###################################################################### + // 1. *** INITIAL VALUES FOR VARIABLES *** + //###################################################################### + + //----------------------------- + // Reset single level variables + //----------------------------- + + zanewm1 = (double) 0.0; + zda = (double) 0.0; + zcovpclr = (double) 0.0; + zcovpmax = (double) 0.0; + zcovptot = (double) 0.0; + zcldtopdist = (double) 0.0; + //------------- + // zero arrays + //------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[0 + 2*jm] = (double) 0.0; // precip fluxes + zpfplsx[1 + 2*jm] = (double) 0.0; + } + + // ---------------------- + // non CLV initialization + // ---------------------- + for (jk = 0; jk <= klev + 1 + -1; jk += 1) { + + // Fortran counting is beautiful! + jk_i = (jk + 1) % 2; + jk_ip1 = (jk + 2) % 2; + jk_im1 = (jk) % 2; + + if (1 <= jk + 1 && jk + 1 <= klev) { + ztp1[jk_i] = pt[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_t[jl + klon*(jk + klev*ibl)]; + zqx[4] = pq[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + zqx0[4] = pq[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_q[jl + klon*(jk + klev*ibl)]; + za[jk_i] = pa[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + zaorig = pa[jl + klon*(jk + klev*ibl)] + ptsphy* + tendency_tmp_a[jl + klon*(jk + klev*ibl)]; + + // ------------------------------------- + // initialization for CLV family + // ------------------------------------- + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + zqx[jm] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + ptsphy* + tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + zqx0[jm] = pclv[jl + klon*(jk + klev*(jm + 5*ibl))] + ptsphy* + tendency_tmp_cld[jl + klon*(jk + klev*(jm + 5*ibl))]; + } + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxn2d[jm] = (double) 0.0; // end of timestep values in 2D + zlneg[jm] = (double) 0.0; // negative input check + } + + + // ---------------------------------------------------- + // Tidy up very small cloud cover or total cloud water + // ---------------------------------------------------- + if (zqx[0] + zqx[1] < (*yrecldp).rlmin || za[jk_i] < (*yrecldp) + .ramin) { + + // Evaporate small cloud liquid water amounts + zlneg[0] = zlneg[0] + zqx[0]; + zqadj = zqx[0]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + zqx[4] = zqx[4] + zqx[0]; + zqx[0] = (double) 0.0; + + // Evaporate small cloud ice water amounts + zlneg[1] = zlneg[1] + zqx[1]; + zqadj = zqx[1]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + zqx[4] = zqx[4] + zqx[1]; + zqx[1] = (double) 0.0; + + // Set cloud cover to zero + za[jk_i] = (double) 0.0; + + } + + // --------------------------------- + // Tidy up small CLV variables + // --------------------------------- + //DIR$ IVDEP + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + if (zqx[jm] < (*yrecldp).rlmin) { + zlneg[jm] = zlneg[jm] + zqx[jm]; + zqadj = zqx[jm]*zqtmst; + tendency_loc_q[jl + klon*(jk + klev*ibl)] = + tendency_loc_q[jl + klon*(jk + klev*ibl)] + zqadj; + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralvdcp*zqadj; + } + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = + tendency_loc_t[jl + klon*(jk + klev*ibl)] - ralsdcp*zqadj; + } + zqx[4] = zqx[4] + zqx[jm]; + zqx[jm] = (double) 0.0; + } + } + + // ------------------------------ + // Define saturation values + // ------------------------------ + //---------------------------------------- + // old *diagnostic* mixed phase saturation + //---------------------------------------- + zfoealfa = ((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))); + zfoeewmt = fmin(((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))) / pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsmix = zfoeewmt; + zqsmix = zqsmix / ((double) 1.0 - retv*zqsmix); + + //--------------------------------------------- + // ice saturation T<273K + // liquid water saturation for T>273K + //--------------------------------------------- + zalfa = ((double)(fmax(0.0, copysign(1.0, ztp1[jk_i] - rtt)))); + zfoeew = fmin((zalfa*((double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)))) + + ((double) 1.0 - zalfa)*((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))) / + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zfoeew = fmin((double) 0.5, zfoeew); + zqsice = zfoeew / ((double) 1.0 - retv*zfoeew); + + //---------------------------------- + // liquid water saturation + //---------------------------------- + zfoeeliqt = fmin(((double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)))) / + pap[jl + klon*(jk + klev*ibl)], (double) 0.5); + zqsliq = zfoeeliqt; + zqsliq = zqsliq / ((double) 1.0 - retv*zqsliq); + + // !---------------------------------- + // ! ice water saturation + // !---------------------------------- + // ZFOEEICET(JL,JK)=MIN(FOEEICE(ZTP1(JL,JK))/PAP(JL,JK),0.5_JPRB) + // ZQSICE(JL,JK)=ZFOEEICET(JL,JK) + // ZQSICE(JL,JK)=ZQSICE(JL,JK)/(1.0_JPRB-retv*ZQSICE(JL,JK)) + + + //------------------------------------------ + // Ensure cloud fraction is between 0 and 1 + //------------------------------------------ + za[jk_i] = fmax((double) 0.0, fmin((double) 1.0, za[jk_i])); + + //------------------------------------------------------------------- + // Calculate liq/ice fractions (no longer a diagnostic relationship) + //------------------------------------------------------------------- + zli = zqx[0] + zqx[1]; + if (zli > (*yrecldp).rlmin) { + zliqfrac = zqx[0] / zli; + zicefrac = (double) 1.0 - zliqfrac; + } else { + zliqfrac = (double) 0.0; + zicefrac = (double) 0.0; + } + + //###################################################################### + // 2. *** CONSTANTS AND PARAMETERS *** + //###################################################################### + // Calculate L in updrafts of bl-clouds + // Specify QS, P/PS for tropopause (for c2) + // And initialize variables + //------------------------------------------ + + //--------------------------------- + // Find tropopause level (ZTRPAUS) + //--------------------------------- + //ZTRPAUS = 0.1_JPRB + //ZPAPHD = 1.0_JPRB / PAPH(JL, KLEV + 1, IBL) + //DO JK=1,KLEV - 1 + // ZSIG = PAP(JL, JK, IBL)*ZPAPHD + // IF (ZSIG > 0.1_JPRB .and. ZSIG < 0.4_JPRB .and. ZTP1(JK_I) > ZTP1(JL, JK + 1, IBL)) THEN + // ZTRPAUS = ZSIG + // END IF + //END DO + + //----------------------------- + // Reset single level variables + //----------------------------- + + //ZANEWM1 = 0.0_JPRB + //ZDA = 0.0_JPRB + //ZCOVPCLR = 0.0_JPRB + //ZCOVPMAX = 0.0_JPRB + //ZCOVPTOT = 0.0_JPRB + //ZCLDTOPDIST = 0.0_JPRB + + //###################################################################### + // 3. *** PHYSICS *** + //###################################################################### + + + //---------------------------------------------------------------------- + // START OF VERTICAL LOOP + //---------------------------------------------------------------------- + + if ((*yrecldp).ncldtop <= jk + 1 && jk + 1 <= klev) { + + //---------------------------------------------------------------------- + // 3.0 INITIALIZE VARIABLES + //---------------------------------------------------------------------- + + //--------------------------------- + // First guess microphysics + //--------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxfg[jm] = zqx[jm]; + } + + //--------------------------------- + // Set KLON arrays to zero + //--------------------------------- + + zlicld = (double) 0.0; + zrainaut = (double) 0.0; // currently needed for diags + zrainacc = (double) 0.0; // currently needed for diags + zsnowaut = (double) 0.0; // needed + zldefr = (double) 0.0; + zacust = (double) 0.0; // set later when needed + zqpretot = (double) 0.0; + zlfinalsum = (double) 0.0; + + // Required for first guess call + zlcond1 = (double) 0.0; + zlcond2 = (double) 0.0; + zsupsat = (double) 0.0; + zlevapl = (double) 0.0; + zlevapi = (double) 0.0; + + //------------------------------------- + // solvers for cloud fraction + //------------------------------------- + zsolab = (double) 0.0; + zsolac = (double) 0.0; + + zicetot = (double) 0.0; + + //------------------------------------------ + // reset matrix so missing pathways are set + //------------------------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsolqb[jn + 5*jm] = (double) 0.0; + zsolqa[jn + 5*jm] = (double) 0.0; + } + } + + //---------------------------------- + // reset new microphysics variables + //---------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zfallsrce[jm] = (double) 0.0; + zfallsink[jm] = (double) 0.0; + zconvsrce[jm] = (double) 0.0; + zconvsink[jm] = (double) 0.0; + zpsupsatsrce[jm] = (double) 0.0; + zratio[jm] = (double) 0.0; + } + + + //------------------------- + // derived variables needed + //------------------------- + + zdp = paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - paph[jl + klon*(jk + (klev + + 1)*ibl)]; // dp + zgdp = rg / zdp; // g/dp + zrho = pap[jl + klon*(jk + klev*ibl)] / (rd*ztp1[jk_i]); // p/RT air density + + zdtgdp = ptsphy*zgdp; // dt g/dp + zrdtgdp = zdp*((double) 1.0 / (ptsphy*rg)); // 1/(dt g/dp) + + if (jk + 1 > 1) { + zdtgdpf = ptsphy*rg / (pap[jl + klon*(jk + klev*ibl)] - pap[jl + klon*(-1 + + jk + klev*ibl)]); + } + + //------------------------------------ + // Calculate dqs/dT correction factor + //------------------------------------ + // Reminder: retv=rv/rd-1 + + // liquid + zfacw = r5les / (pow((ztp1[jk_i] - r4les), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeeliqt); + zdqsliqdt = zfacw*zcor*zqsliq; + zcorqsliq = (double) 1.0 + ralvdcp*zdqsliqdt; + + // ice + zfaci = r5ies / (pow((ztp1[jk_i] - r4ies), 2)); + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeew); + zdqsicedt = zfaci*zcor*zqsice; + zcorqsice = (double) 1.0 + ralsdcp*zdqsicedt; + + // diagnostic mixed + zalfaw = zfoealfa; + zalfawm = zalfaw; + zfac = zalfaw*zfacw + ((double) 1.0 - zalfaw)*zfaci; + zcor = (double) 1.0 / ((double) 1.0 - retv*zfoeewmt); + zdqsmixdt = zfac*zcor*zqsmix; + zcorqsmix = (double) 1.0 + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + ralvdcp + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zdqsmixdt; + + // evaporation/sublimation limits + zevaplimmix = fmax((zqsmix - zqx[4]) / zcorqsmix, (double) 0.0); + zevaplimliq = fmax((zqsliq - zqx[4]) / zcorqsliq, (double) 0.0); + zevaplimice = fmax((zqsice - zqx[4]) / zcorqsice, (double) 0.0); + + //-------------------------------- + // in-cloud consensate amount + //-------------------------------- + ztmpa = (double) 1.0 / fmax(za[jk_i], zepsec); + zliqcld = zqx[0]*ztmpa; + zicecld = zqx[1]*ztmpa; + zlicld = zliqcld + zicecld; + + + //------------------------------------------------ + // Evaporate very small amounts of liquid and ice + //------------------------------------------------ + + if (zqx[0] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(0)] = zqx[0]; + zsolqa[0 + 5*(4)] = -zqx[0]; + } + + if (zqx[1] < (*yrecldp).rlmin) { + zsolqa[4 + 5*(1)] = zqx[1]; + zsolqa[1 + 5*(4)] = -zqx[1]; + } + + + //--------------------------------------------------------------------- + // 3.1 ICE SUPERSATURATION ADJUSTMENT + //--------------------------------------------------------------------- + // Note that the supersaturation adjustment is made with respect to + // liquid saturation: when T>0C + // ice saturation: when T<0C + // with an adjustment made to allow for ice + // supersaturation in the clear sky + // Note also that the KOOP factor automatically clips the supersaturation + // to a maximum set by the liquid water saturation mixing ratio + // important for temperatures near to but below 0C + //----------------------------------------------------------------------- + + //DIR$ NOFUSION + + //----------------------------------- + // 3.1.1 Supersaturation limit (from Koop) + //----------------------------------- + // Needs to be set for all temperatures + zfokoop = ((double)(fmin(rkoop1 - rkoop2*ztp1[jk_i], (double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/ + (ztp1[jk_i] - r4les)))*1.0/(double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies)))))); + + if (ztp1[jk_i] >= rtt || (*yrecldp).nssopt == 0) { + zfac = (double) 1.0; + zfaci = (double) 1.0; + } else { + zfac = za[jk_i] + zfokoop*((double) 1.0 - za[jk_i]); + zfaci = ptsphy / (*yrecldp).rkooptau; + } + + //------------------------------------------------------------------- + // 3.1.2 Calculate supersaturation wrt Koop including dqs/dT + // correction factor + // [#Note: QSICE or QSLIQ] + //------------------------------------------------------------------- + + // Calculate supersaturation to add to cloud + if (za[jk_i] > (double) 1.0 - (*yrecldp).ramin) { + zsupsat = fmax((zqx[4] - zfac*zqsice) / zcorqsice, (double) 0.0); + } else { + // Calculate environmental humidity supersaturation + zqp1env = (zqx[4] - za[jk_i]*zqsice) / fmax((double) 1.0 - za[jk_i], zepsilon); + //& SIGN(MAX(ABS(1.0_JPRB-ZA(JL,JK)),ZEPSILON),1.0_JPRB-ZA(JL,JK)) + zsupsat = fmax(((double) 1.0 - za[jk_i])*(zqp1env - zfac*zqsice) / + zcorqsice, (double) 0.0); + } + + //------------------------------------------------------------------- + // Here the supersaturation is turned into liquid water + // However, if the temperature is below the threshold for homogeneous + // freezing then the supersaturation is turned instantly to ice. + //-------------------------------------------------------------------- + + if (zsupsat > zepsec) { + + if (ztp1[jk_i] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zsupsat; + // Include liquid in first guess + zqxfg[0] = zqxfg[0] + zsupsat; + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zsupsat; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zsupsat; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + zsupsat; + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk_i])*zfaci; + + } + + //------------------------------------------------------- + // 3.1.3 Include supersaturation from previous timestep + // (Calculated in sltENDIF semi-lagrangian LDSLPHY=T) + //------------------------------------------------------- + if (psupsat[jl + klon*(jk + klev*ibl)] > zepsec) { + if (ztp1[jk_i] > (*yrecldp).rthomo) { + // Turn supersaturation into liquid water + zsolqa[0 + 5*(0)] = + zsolqa[0 + 5*(0)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[0] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add liquid to first guess for deposition term + zqxfg[0] = zqxfg[0] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } else { + // Turn supersaturation into ice water + zsolqa[1 + 5*(1)] = + zsolqa[1 + 5*(1)] + psupsat[jl + klon*(jk + klev*ibl)]; + zpsupsatsrce[1] = psupsat[jl + klon*(jk + klev*ibl)]; + // Add ice to first guess for deposition term + zqxfg[1] = zqxfg[1] + psupsat[jl + klon*(jk + klev*ibl)]; + // Store cloud budget diagnostics if required + } + + // Increase cloud amount using RKOOPTAU timescale + zsolac = ((double) 1.0 - za[jk_i])*zfaci; + // Store cloud budget diagnostics if required + } + + // on JL + + //--------------------------------------------------------------------- + // 3.2 DETRAINMENT FROM CONVECTION + //--------------------------------------------------------------------- + // * Diagnostic T-ice/liq split retained for convection + // Note: This link is now flexible and a future convection + // scheme can detrain explicit seperate budgets of: + // cloud water, ice, rain and snow + // * There is no (1-ZA) multiplier term on the cloud detrainment + // term, since is now written in mass-flux terms + // [#Note: Should use ZFOEALFACU used in convection rather than ZFOEALFA] + //--------------------------------------------------------------------- + if (jk + 1 < klev && jk + 1 >= (*yrecldp).ncldtop) { + + + plude[jl + klon*(jk + klev*ibl)] = plude[jl + klon*(jk + klev*ibl)]*zdtgdp; + + if (/*ldcum[jl + klon*ibl] &&*/ plude[jl + klon*(jk + klev*ibl)] > (*yrecldp) + .rlmin && plu[jl + klon*(1 + jk + klev*ibl)] > zepsec) { + + zsolac = zsolac + plude[jl + klon*(jk + klev*ibl)] / plu[jl + klon*(1 + jk + + klev*ibl)]; + // *diagnostic temperature split* + zalfaw = zfoealfa; + zconvsrce[0] = zalfaw*plude[jl + klon*(jk + klev*ibl)]; + zconvsrce[1] = + ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)]; + zsolqa[0 + 5*(0)] = + zsolqa[0 + 5*(0)] + zconvsrce[0]; + zsolqa[1 + 5*(1)] = + zsolqa[1 + 5*(1)] + zconvsrce[1]; + + } else { + + plude[jl + klon*(jk + klev*ibl)] = (double) 0.0; + + } + // *convective snow detrainment source + //if (ldcum[jl + klon*ibl]) { + zsolqa[3 + 5*(3)] = + zsolqa[3 + 5*(3)] + psnde[jl + klon*(jk + klev*ibl)]*zdtgdp; + //} + + + } + // JK (*yrecldp).ncldtop) { + + zmf = fmax((double) 0.0, (pmfu[jl + klon*(jk + klev*ibl)] + pmfd[jl + + klon*(jk + klev*ibl)])*zdtgdp); + zacust = zmf*zanewm1; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlcust[jm] = zmf*zqxnm1[jm]; + // record total flux for enthalpy budget: + zconvsrce[jm] = zconvsrce[jm] + zlcust[jm]; + } + } + + // Now have to work out how much liquid evaporates at arrival point + // since there is no prognostic memory for in-cloud humidity, i.e. + // we always assume cloud is saturated. + + zdtdp = zrdcp*(double) 0.5*(ztp1[jk_im1] + ztp1[jk_i]) / paph[jl + + klon*(jk + (klev + 1)*ibl)]; + zdtforc = zdtdp*(pap[jl + klon*(jk + klev*ibl)] - pap[jl + klon*(-1 + jk + + klev*ibl)]); + //[#Note: Diagnostic mixed phase should be replaced below] + zdqs = zanewm1*zdtforc*zdqsmixdt; + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (!llfall[jm] && iphase[jm] > 0) { + zlfinal = fmax((double) 0.0, zlcust[jm] - zdqs); //lim to zero + // no supersaturation allowed incloud ---V + zevap = fmin((zlcust[jm] - zlfinal), zevaplimmix); + // ZEVAP=0.0_JPRB + zlfinal = zlcust[jm] - zevap; + zlfinalsum = zlfinalsum + zlfinal; // sum + + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zlcust[jm]; // whole sum + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zevap; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zevap; + } + } + + // Reset the cloud contribution if no cloud water survives to this level: + if (zlfinalsum < zepsec) { + zacust = (double) 0.0; + } + zsolac = zsolac + zacust; + + } + // on JK>NCLDTOP + + //--------------------------------------------------------------------- + // Subsidence sink of cloud to the layer below + // (Implicit - re. CFL limit on convective mass flux) + //--------------------------------------------------------------------- + + + if (jk + 1 < klev) { + + zmfdn = fmax((double) 0.0, (pmfu[jl + klon*(1 + jk + klev*ibl)] + pmfd[jl + + klon*(1 + jk + klev*ibl)])*zdtgdp); + + zsolab = zsolab + zmfdn; + zsolqb[0 + 5*(0)] = zsolqb[0 + 5*(0)] + zmfdn; + zsolqb[1 + 5*(1)] = zsolqb[1 + 5*(1)] + zmfdn; + + // Record sink for cloud budget and enthalpy budget diagnostics + zconvsink[0] = zmfdn; + zconvsink[1] = zmfdn; + + } + + + //---------------------------------------------------------------------- + // 3.4 EROSION OF CLOUDS BY TURBULENT MIXING + //---------------------------------------------------------------------- + // NOTE: In default tiedtke scheme this process decreases the cloud + // area but leaves the specific cloud water content + // within clouds unchanged + //---------------------------------------------------------------------- + + // ------------------------------ + // Define turbulent erosion rate + // ------------------------------ + zldifdt = (*yrecldp).rcldiff*ptsphy; //original version + //Increase by factor of 5 for convective points + if (ktype[jl + klon*ibl] > 0 && plude[jl + klon*(jk + klev*ibl)] > zepsec) { + zldifdt = (*yrecldp).rcldiff_convi*zldifdt; + } + + // At the moment, works on mixed RH profile and partitioned ice/liq fraction + // so that it is similar to previous scheme + // Should apply RHw for liquid cloud and RHi for ice cloud separately + if (zli > zepsec) { + // Calculate environmental humidity + // ZQE=(ZQX(JL,JK,NCLDQV)-ZA(JL,JK)*ZQSMIX(JL,JK))/& + // & MAX(ZEPSEC,1.0_JPRB-ZA(JL,JK)) + // ZE=ZLDIFDT(JL)*MAX(ZQSMIX(JL,JK)-ZQE,0.0_JPRB) + ze = zldifdt*fmax(zqsmix - zqx[4], (double) 0.0); + zleros = za[jk_i]*ze; + zleros = fmin(zleros, zevaplimmix); + zleros = fmin(zleros, zli); + zaeros = zleros / zlicld; //if linear term + + // Erosion is -ve LINEAR in L,A + zsolac = zsolac - zaeros; //linear + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac*zleros; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac*zleros; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac*zleros; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac*zleros; + + } + + //---------------------------------------------------------------------- + // 3.4 CONDENSATION/EVAPORATION DUE TO DQSAT/DT + //---------------------------------------------------------------------- + // calculate dqs/dt + // Note: For the separate prognostic Qi and Ql, one would ideally use + // Qsat/DT wrt liquid/Koop here, since the physics is that new clouds + // forms by liquid droplets [liq] or when aqueous aerosols [Koop] form. + // These would then instantaneous freeze if T<-38C or lead to ice growth + // by deposition in warmer mixed phase clouds. However, since we do + // not have a separate prognostic equation for in-cloud humidity or a + // statistical scheme approach in place, the depositional growth of ice + // in the mixed phase can not be modelled and we resort to supersaturation + // wrt ice instanteously converting to ice over one timestep + // (see Tompkins et al. QJRMS 2007 for details) + // Thus for the initial implementation the diagnostic mixed phase is + // retained for the moment, and the level of approximation noted. + //---------------------------------------------------------------------- + + zdtdp = zrdcp*ztp1[jk_i] / pap[jl + klon*(jk + klev*ibl)]; + zdpmxdt = zdp*zqtmst; + zmfdn = (double) 0.0; + if (jk + 1 < klev) { + zmfdn = + pmfu[jl + klon*(1 + jk + klev*ibl)] + pmfd[jl + klon*(1 + jk + klev*ibl)]; + } + zwtot = pvervel[jl + klon*(jk + klev*ibl)] + (double) 0.5*rg*(pmfu[jl + + klon*(jk + klev*ibl)] + pmfd[jl + klon*(jk + klev*ibl)] + zmfdn); + zwtot = fmin(zdpmxdt, fmax(-zdpmxdt, zwtot)); + zzzdt = phrsw[jl + klon*(jk + klev*ibl)] + phrlw[jl + klon*(jk + klev*ibl)]; + zdtdiab = + fmin(zdpmxdt*zdtdp, fmax(-zdpmxdt*zdtdp, zzzdt))*ptsphy + ralfdcp*zldefr; + // Note: ZLDEFR should be set to the difference between the mixed phase functions + // in the convection and cloud scheme, but this is not calculated, so is zero and + // the functions must be the same + zdtforc = zdtdp*zwtot*ptsphy + zdtdiab; + zqold = zqsmix; + ztold = ztp1[jk_i]; + ztp1[jk_i] = ztp1[jk_i] + zdtforc; + ztp1[jk_i] = fmax(ztp1[jk_i], (double) 160.0); + llflag = true; + + // Formerly a call to CUADJTQ(..., ICALL=5) + zqp = (double) 1.0 / pap[jl + klon*(jk + klev*ibl)]; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond = (zqsmix - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jk_i] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jk_i] - r4ies, 2))))); + ztp1[jk_i] = ztp1[jk_i] + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond; + zqsmix = zqsmix - zcond; + zqsat = ((double)(r2es*((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))* + exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)) + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))* + exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies)))))*zqp; + zqsat = fmin((double) 0.5, zqsat); + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsat); + zqsat = zqsat*zcor; + zcond1 = (zqsmix - zqsat) / ((double) 1.0 + zqsat*zcor*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jk_i] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jk_i] - r4ies, 2))))); + ztp1[jk_i] = ztp1[jk_i] + ((double)((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*ralvdcp + + (1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*ralsdcp))*zcond1; + zqsmix = zqsmix - zcond1; + + zdqs = zqsmix - zqold; + zqsmix = zqold; + ztp1[jk_i] = ztold; + + //---------------------------------------------------------------------- + // 3.4a ZDQS(JL) > 0: EVAPORATION OF CLOUDS + // ---------------------------------------------------------------------- + // Erosion term is LINEAR in L + // Changed to be uniform distribution in cloud region + + + // Previous function based on DELTA DISTRIBUTION in cloud: + if (zdqs > (double) 0.0) { + // If subsidence evaporation term is turned off, then need to use updated + // liquid and cloud here? + // ZLEVAP = MAX(ZA(JL,JK)+ZACUST(JL),1.0_JPRB)*MIN(ZDQS(JL),ZLICLD(JL)+ZLFINALSUM(JL)) + zlevap = za[jk_i]*fmin(zdqs, zlicld); + zlevap = fmin(zlevap, zevaplimmix); + zlevap = fmin(zlevap, fmax(zqsmix - zqx[4], (double) 0.0)); + + // For first guess call + zlevapl = zliqfrac*zlevap; + zlevapi = zicefrac*zlevap; + + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] + zliqfrac*zlevap; + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] - zliqfrac*zlevap; + + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] + zicefrac*zlevap; + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] - zicefrac*zlevap; + + } + + + //---------------------------------------------------------------------- + // 3.4b ZDQS(JL) < 0: FORMATION OF CLOUDS + //---------------------------------------------------------------------- + // (1) Increase of cloud water in existing clouds + if (za[jk_i] > zepsec && zdqs <= -(*yrecldp).rlmin) { + + zlcond1 = fmax(-zdqs, (double) 0.0); //new limiter + + //old limiter (significantly improves upper tropospheric humidity rms) + if (za[jk_i] > (double) 0.99) { + zcor = (double) 1.0 / ((double) 1.0 - retv*zqsmix); + zcdmax = (zqx[4] - zqsmix) / ((double) 1.0 + zcor*zqsmix*((double)(((double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2)))*r5alvcp)* + (1.0/pow(ztp1[jk_i] - r4les, 2)) + ((1.0 - (double)(fmin(1.0, pow((fmax(rtice, fmin(rtwat, ztp1[jk_i])) - rtice)*rtwat_rtice_r, 2))))*r5alscp)* + (1.0/pow(ztp1[jk_i] - r4ies, 2))))); + } else { + zcdmax = (zqx[4] - za[jk_i]*zqsmix) / za[jk_i]; + } + zlcond1 = fmax(fmin(zlcond1, zcdmax), (double) 0.0); + // end old limiter + + zlcond1 = za[jk_i]*zlcond1; + if (zlcond1 < (*yrecldp).rlmin) { + zlcond1 = (double) 0.0; + } + + //------------------------------------------------------------------------- + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------- + if (ztp1[jk_i] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond1; + zqxfg[0] = zqxfg[0] + zlcond1; + } else { + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond1; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond1; + zqxfg[1] = zqxfg[1] + zlcond1; + } + } + + // (2) Generation of new clouds (da/dt>0) + + + if (zdqs <= -(*yrecldp).rlmin && za[jk_i] < (double) 1.0 - zepsec) { + + //--------------------------- + // Critical relative humidity + //--------------------------- + zrhc = (*yrecldp).ramid; + zsigk = + pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + 1)*ibl)]; + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + if (zsigk > (double) 0.8) { + zrhc = (*yrecldp).ramid + ((double) 1.0 - (*yrecldp).ramid)*(pow(((zsigk - + (double) 0.8) / (double) 0.2), 2)); + } + + // Commented out for CY37R1 to reduce humidity in high trop and strat + // ! Increase RHcrit to 1.0 towards the tropopause (trop-0.2) and above + // ZBOTT=ZTRPAUS(JL)+0.2_JPRB + // IF(ZSIGK < ZBOTT) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*MIN(((ZBOTT-ZSIGK)/0.2_JPRB)**2,1.0_JPRB) + // ENDIF + + //--------------------------- + // Supersaturation options + //--------------------------- + if ((*yrecldp).nssopt == 0) { + // No scheme + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 1) { + // Tompkins + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + zqe = fmax((double) 0.0, zqe); + } else if ((*yrecldp).nssopt == 2) { + // Lohmann and Karcher + zqe = zqx[4]; + } else if ((*yrecldp).nssopt == 3) { + // Gierens + zqe = zqx[4] + zli; + } + + if (ztp1[jk_i] >= rtt || (*yrecldp).nssopt == 0) { + // No ice supersaturation allowed + zfac = (double) 1.0; + } else { + // Ice supersaturation + zfac = zfokoop; + } + + if (zqe >= zrhc*zqsice*zfac && zqe < zqsice*zfac) { + // note: not **2 on 1-a term if ZQE is used. + // Added correction term ZFAC to numerator 15/03/2010 + zacond = -((double) 1.0 - za[jk_i])*zfac*zdqs / fmax((double) + 2.0*(zfac*zqsice - zqe), zepsec); + + zacond = fmin(zacond, (double) 1.0 - za[jk_i]); //PUT THE LIMITER BACK + + // Linear term: + // Added correction term ZFAC 15/03/2010 + zlcond2 = -zfac*zdqs*(double) 0.5*zacond; //mine linear + + // new limiter formulation + zzdl = (double) 2.0*(zfac*zqsice - zqe) / fmax(zepsec, (double) 1.0 - za[jk_i]); + // Added correction term ZFAC 15/03/2010 + if (zfac*zdqs < -zzdl) { + // ZLCONDLIM=(ZA(JL,JK)-1.0_JPRB)*ZDQS(JL)-ZQSICE(JL,JK)+ZQX(JL,JK,NCLDQV) + zlcondlim = + (za[jk_i] - (double) 1.0)*zfac*zdqs - zfac*zqsice + zqx[4]; + zlcond2 = fmin(zlcond2, zlcondlim); + } + zlcond2 = fmax(zlcond2, (double) 0.0); + + if (zlcond2 < (*yrecldp).rlmin || ((double) 1.0 - za[jk_i]) < zepsec + ) { + zlcond2 = (double) 0.0; + zacond = (double) 0.0; + } + if (zlcond2 == (double) 0.0) { + zacond = (double) 0.0; + } + + // Large-scale generation is LINEAR in A and LINEAR in L + zsolac = zsolac + zacond; //linear + + //------------------------------------------------------------------------ + // All increase goes into liquid unless so cold cloud homogeneously freezes + // Include new liquid formation in first guess value, otherwise liquid + // remains at cold temperatures until next timestep. + //------------------------------------------------------------------------ + if (ztp1[jk_i] > (*yrecldp).rthomo) { + zsolqa[0 + 5*(4)] = zsolqa[0 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(0)] = zsolqa[4 + 5*(0)] - zlcond2; + zqxfg[0] = zqxfg[0] + zlcond2; + } else { + // homogeneous freezing + zsolqa[1 + 5*(4)] = zsolqa[1 + 5*(4)] + zlcond2; + zsolqa[4 + 5*(1)] = zsolqa[4 + 5*(1)] - zlcond2; + zqxfg[1] = zqxfg[1] + zlcond2; + } + + } + } + + //---------------------------------------------------------------------- + // 3.7 Growth of ice by vapour deposition + //---------------------------------------------------------------------- + // Following Rotstayn et al. 2001: + // does not use the ice nuclei number from cloudaer.F90 + // but rather a simple Meyers et al. 1992 form based on the + // supersaturation and assuming clouds are saturated with + // respect to liquid water (well mixed), (or Koop adjustment) + // Growth considered as sink of liquid water if present so + // Bergeron-Findeisen adjustment in autoconversion term no longer needed + //---------------------------------------------------------------------- + + //-------------------------------------------------------- + //- + //- Ice deposition following Rotstayn et al. (2001) + //- (monodisperse ice particle size distribution) + //- + //-------------------------------------------------------- + if (idepice == 1) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*rg) + //-------------------------------------------------------------- + + if (za[jk_im1] < (*yrecldp).rcldtopcf && za[jk_i] >= (*yrecldp) + .rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk_i] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = (((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))*rv) / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq + - (double) 0.639); + + //------------------------------------------------ + // 2.4e-2 is conductivity of air + // 8.8 = 700**1/3 = density of ice to the third + //------------------------------------------------ + zadd = rlstt*(rlstt / (rv*ztp1[jk_i]) - (double) 1.0) / ((double) + 2.4E-2*ztp1[jk_i]); + zbdd = rv*ztp1[jk_i]*pap[jl + klon*(jk + klev*ibl)] / ((double) + 2.21*zvpice); + zcvds = (double) 7.8*(pow((zicenuclei / zrho), (double) 0.666))*(zvpliq - + zvpice) / ((double) 8.87*(zadd + zbdd)*zvpice); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + //------------------ + // new value of ice: + //------------------ + zinew = pow(((double) 0.666*zcvds*ptsphy + (pow(zice0, (double) 0.666))), + (double) 1.5); + + //--------------------------- + // grid-mean deposition rate: + //--------------------------- + zdepos = fmax(za[jk_i]*(zinew - zice0), (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // ZDEPOS = ZDEPOS*MIN(RDEPLIQREFRATE+ZCLDTOPDIST(JL)/RDEPLIQREFDEPTH,1.0_JPRB) + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0 + ); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + + } + + //-------------------------------------------------------- + //- + //- Ice deposition assuming ice PSD + //- + //-------------------------------------------------------- + } else if (idepice == 2) { + + + //-------------------------------------------------------------- + // Calculate distance from cloud top + // defined by cloudy layer below a layer with cloud frac <0.01 + // ZDZ = ZDP(JL)/(ZRHO(JL)*rg) + //-------------------------------------------------------------- + + if (za[jk_im1] < (*yrecldp).rcldtopcf && za[jk_i] >= (*yrecldp) + .rcldtopcf) { + zcldtopdist = (double) 0.0; + } else { + zcldtopdist = zcldtopdist + zdp / (zrho*rg); + } + + //-------------------------------------------------------------- + // only treat depositional growth if liquid present. due to fact + // that can not model ice growth from vapour without additional + // in-cloud water vapour variable + //-------------------------------------------------------------- + if (ztp1[jk_i] < rtt && zqxfg[0] > (*yrecldp).rlmin) { + // T<273K + + zvpice = (((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))*rv) / rd; + zvpliq = zvpice*zfokoop; + zicenuclei = (double) 1000.0*exp((double) 12.96*(zvpliq - zvpice) / zvpliq + - (double) 0.639); + + //----------------------------------------------------- + // RICEINIT=1.E-12_JPRB is initial mass of ice particle + //----------------------------------------------------- + zice0 = fmax(zicecld, zicenuclei*(*yrecldp).riceinit / zrho); + + // Particle size distribution + ztcg = (double) 1.0; + zfacx1i = (double) 1.0; + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk_i] + + pap[jl + klon*(jk + klev*ibl)]*(*yrecldp).rcl_apb3*(pow(ztp1[jk_i], (double) 3.)); + zcorrfac = pow(((double) 1.0 / zrho), (double) 0.5); + zcorrfac2 = (pow((ztp1[jk_i] / (double) 273.0), (double) 1.5)) + *((double) 393.0 / (ztp1[jk_i] + (double) 120.0)); + + zpr02 = zrho*zice0*(*yrecldp).rcl_const1i / (ztcg*zfacx1i); + + zterm1 = (zvpliq - zvpice)*(pow(ztp1[jk_i], (double) 2.0)) + *zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2i*zfacx1i / + (zrho*zaplusb*zvpice); + zterm2 = (double) 0.65*(*yrecldp).rcl_const6i*(pow(zpr02, (*yrecldp) + .rcl_const4i)) + (*yrecldp).rcl_const3i*(pow(zcorrfac, (double) 0.5)) + *(pow(zrho, (double) 0.5))*(pow(zpr02, (*yrecldp).rcl_const5i)) / + (pow(zcorrfac2, (double) 0.5)); + + zdepos = fmax(za[jk_i]*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit deposition to liquid water amount + // If liquid is all frozen, ice would use up reservoir of water + // vapour in excess of ice saturation mixing ratio - However this + // can not be represented without a in-cloud humidity variable. Using + // the grid-mean humidity would imply a large artificial horizontal + // flux from the clear sky to the cloudy area. We thus rely on the + // supersaturation check to clean up any remaining supersaturation + //-------------------------------------------------------------------- + zdepos = fmin(zdepos, zqxfg[0]); // limit to liquid water amount + + //-------------------------------------------------------------------- + // At top of cloud, reduce deposition rate near cloud top to account for + // small scale turbulent processes, limited ice nucleation and ice fallout + //-------------------------------------------------------------------- + // Change to include dependence on ice nuclei concentration + // to increase deposition rate with decreasing temperatures + zinfactor = fmin(zicenuclei / (double) 15000., (double) 1.0); + zdepos = zdepos*fmin(zinfactor + ((double) 1.0 - zinfactor)*((*yrecldp) + .rdepliqrefrate + zcldtopdist / (*yrecldp).rdepliqrefdepth), (double) 1.0 + ); + + //-------------- + // add to matrix + //-------------- + zsolqa[1 + 5*(0)] = zsolqa[1 + 5*(0)] + zdepos; + zsolqa[0 + 5*(1)] = zsolqa[0 + 5*(1)] - zdepos; + zqxfg[1] = zqxfg[1] + zdepos; + zqxfg[0] = zqxfg[0] - zdepos; + } + + } + // on IDEPICE + + //###################################################################### + // 4 *** PRECIPITATION PROCESSES *** + //###################################################################### + + //---------------------------------- + // revise in-cloud consensate amount + //---------------------------------- + ztmpa = (double) 1.0 / fmax(za[jk_i], zepsec); + zliqcld = zqxfg[0]*ztmpa; + zicecld = zqxfg[1]*ztmpa; + zlicld = zliqcld + zicecld; + + //---------------------------------------------------------------------- + // 4.2 SEDIMENTATION/FALLING OF *ALL* MICROPHYSICAL SPECIES + // now that rain, snow, graupel species are prognostic + // the precipitation flux can be defined directly level by level + // There is no vertical memory required from the flux variable + //---------------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm] || jm + 1 == 2) { + //------------------------ + // source from layer above + //------------------------ + if (jk + 1 > (*yrecldp).ncldtop) { + zfallsrce[jm] = zpfplsx[jk_i + 2*jm]*zdtgdp; + zsolqa[jm + 5*jm] = zsolqa[jm + 5*jm] + zfallsrce[jm]; + zqxfg[jm] = zqxfg[jm] + zfallsrce[jm]; + // use first guess precip----------V + zqpretot = zqpretot + zqxfg[jm]; + } + //------------------------------------------------- + // sink to next layer, constant fall speed + //------------------------------------------------- + // if aerosol effect then override + // note that for T>233K this is the same as above. + if ((*yrecldp).laericesed && jm + 1 == 2) { + zre_ice = pre_ice[jl + klon*(jk + klev*ibl)]; + // The exponent value is from + // Morrison et al. JAS 2005 Appendix + zvqx[1] = (double) 0.002*(pow(zre_ice, (double) 1.0)); + } + zfall = zvqx[jm]*zrho; + //------------------------------------------------- + // modified by Heymsfield and Iaquinta JAS 2000 + //------------------------------------------------- + // ZFALL = ZFALL*((PAP(JL,JK)*RICEHI1)**(-0.178_JPRB)) & + // &*((ZTP1(JL,JK)*RICEHI2)**(-0.394_JPRB)) + + zfallsink[jm] = zdtgdp*zfall; + // Cloud budget diagnostic stored at end as implicit + // jl + } + // LLFALL + } + // jm + + //--------------------------------------------------------------- + // Precip cover overlap using MAX-RAN Overlap + // Since precipitation is now prognostic we must + // 1) apply an arbitrary minimum coverage (0.3) if precip>0 + // 2) abandon the 2-flux clr/cld treatment + // 3) Thus, since we have no memory of the clear sky precip + // fraction, we mimic the previous method by reducing + // ZCOVPTOT(JL), which has the memory, proportionally with + // the precip evaporation rate, taking cloud fraction + // into account + // #3 above leads to much smoother vertical profiles of + // precipitation fraction than the Klein-Jakob scheme which + // monotonically increases precip fraction and then resets + // it to zero in a step function once clear-sky precip reaches + // zero. + //--------------------------------------------------------------- + if (zqpretot > zepsec) { + zcovptot = (double) 1.0 - (((double) 1.0 - zcovptot)*((double) 1.0 - + fmax(za[jk_i], za[jk_im1]))) / ((double) 1.0 - fmin(za[jk_im1], (double) 1.0 - (double) 1.E-06)); // here!!! + zcovptot = fmax(zcovptot, (*yrecldp).rcovpmin); + zcovpclr = fmax((double) 0.0, zcovptot - za[jk_i]); // clear sky proportion + zraincld = zqxfg[2] / zcovptot; + zsnowcld = zqxfg[3] / zcovptot; + zcovpmax = fmax(zcovptot, zcovpmax); + } else { + zraincld = (double) 0.0; + zsnowcld = (double) 0.0; + zcovptot = (double) 0.0; // no flux - reset cover + zcovpclr = (double) 0.0; // reset clear sky proportion + zcovpmax = (double) 0.0; // reset max cover for ZZRH calc + } + + //---------------------------------------------------------------------- + // 4.3a AUTOCONVERSION TO SNOW + //---------------------------------------------------------------------- + + if (ztp1[jk_i] <= rtt) { + //----------------------------------------------------- + // Snow Autoconversion rate follow Lin et al. 1983 + //----------------------------------------------------- + if (zicecld > zepsec) { + + zzco = ptsphy*(*yrecldp).rsnowlin1*exp((*yrecldp).rsnowlin2*(ztp1[jk_i] - rtt)); + + if ((*yrecldp).laericeauto) { + zlcrit = picrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=0.027 + zzco = zzco*(pow(((*yrecldp).rnice / pnice[jl + klon*(jk + klev*ibl)]), + (double) 0.333)); + } else { + zlcrit = (*yrecldp).rlcritsnow; + } + + zsnowaut = zzco*((double) 1.0 - exp(-(pow((zicecld / zlcrit), 2)))); + zsolqb[3 + 5*(1)] = zsolqb[3 + 5*(1)] + zsnowaut; + + } + } + + //---------------------------------------------------------------------- + // 4.3b AUTOCONVERSION WARM CLOUDS + // Collection and accretion will require separate treatment + // but for now we keep this simple treatment + //---------------------------------------------------------------------- + + if (zliqcld > zepsec) { + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Sundqvist (1989) + //- + //-------------------------------------------------------- + if (iwarmrain == 1) { + + zzco = (*yrecldp).rkconv*ptsphy; + + if ((*yrecldp).laerliqautolsp) { + zlcrit = plcrit_aer[jl + klon*(jk + klev*ibl)]; + // 0.3 = N**0.333 with N=125 cm-3 + zzco = zzco*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), + (double) 0.333)); + } else { + // Modify autoconversion threshold dependent on: + // land (polluted, high CCN, smaller droplets, higher threshold) + // sea (clean, low CCN, larger droplets, lower threshold) + if (plsm[jl + klon*ibl] > (double) 0.5) { + zlcrit = (*yrecldp).rclcrit_land; // land + } else { + zlcrit = (*yrecldp).rclcrit_sea; // ocean + } + } + + //------------------------------------------------------------------ + // Parameters for cloud collection by rain and snow. + // Note that with new prognostic variable it is now possible + // to REPLACE this with an explicit collection parametrization + //------------------------------------------------------------------ + zprecip = (zpfplsx[jk_i + 2*(3)] + zpfplsx[jk_i + 2*(2) + ]) / fmax(zepsec, zcovptot); + zcfpr = (double) 1.0 + (*yrecldp).rprc1*sqrt(fmax(zprecip, (double) 0.0)); + // ZCFPR=1.0_JPRB + RPRC1*SQRT(MAX(ZPRECIP,0.0_JPRB))*& + // &ZCOVPTOT(JL)/(MAX(ZA(JL,JK),ZEPSEC)) + + if ((*yrecldp).laerliqcoll) { + // 5.0 = N**0.333 with N=125 cm-3 + zcfpr = zcfpr*(pow(((*yrecldp).rccn / pccn[jl + klon*(jk + klev*ibl)]), + (double) 0.333)); + } + + zzco = zzco*zcfpr; + zlcrit = zlcrit / fmax(zcfpr, zepsec); + + if (zliqcld / zlcrit < (double) 20.0) { + // Security for exp for some compilers + zrainaut = zzco*((double) 1.0 - exp(-(pow((zliqcld / zlcrit), 2)))); + } else { + zrainaut = zzco; + } + + // rain freezes instantly + if (ztp1[jk_i] <= rtt) { + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zrainaut; + } else { + zsolqb[2 + 5*(0)] = zsolqb[2 + 5*(0)] + zrainaut; + } + + //-------------------------------------------------------- + //- + //- Warm-rain process follow Khairoutdinov and Kogan (2000) + //- + //-------------------------------------------------------- + } else if (iwarmrain == 2) { + + if (plsm[jl + klon*ibl] > (double) 0.5) { + // land + zconst = (*yrecldp).rcl_kk_cloud_num_land; + zlcrit = (*yrecldp).rclcrit_land; + } else { + // ocean + zconst = (*yrecldp).rcl_kk_cloud_num_sea; + zlcrit = (*yrecldp).rclcrit_sea; + } + + if (zliqcld > zlcrit) { + + zrainaut = (double) 1.5*za[jk_i]*ptsphy*(*yrecldp) + .rcl_kkaau*(pow(zliqcld, (*yrecldp).rcl_kkbauq))*(pow(zconst, (*yrecldp + ).rcl_kkbaun)); + + zrainaut = fmin(zrainaut, zqxfg[0]); + if (zrainaut < zepsec) { + zrainaut = (double) 0.0; + } + + zrainacc = (double) 2.0*za[jk_i]*ptsphy*(*yrecldp) + .rcl_kkaac*(pow((zliqcld*zraincld), (*yrecldp).rcl_kkbac)); + + zrainacc = fmin(zrainacc, zqxfg[0]); + if (zrainacc < zepsec) { + zrainacc = (double) 0.0; + } + + } else { + zrainaut = (double) 0.0; + zrainacc = (double) 0.0; + } + + // If temperature < 0, then autoconversion produces snow rather than rain + // Explicit + if (ztp1[jk_i] <= rtt) { + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainaut; + zsolqa[3 + 5*(0)] = zsolqa[3 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainaut; + zsolqa[0 + 5*(3)] = zsolqa[0 + 5*(3)] - zrainacc; + } else { + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainaut; + zsolqa[2 + 5*(0)] = zsolqa[2 + 5*(0)] + zrainacc; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainaut; + zsolqa[0 + 5*(2)] = zsolqa[0 + 5*(2)] - zrainacc; + } + + } + // on IWARMRAIN + + } + // on ZLIQCLD > ZEPSEC + + + //---------------------------------------------------------------------- + // RIMING - COLLECTION OF CLOUD LIQUID DROPS BY SNOW AND ICE + // only active if T<0degC and supercooled liquid water is present + // AND if not Sundquist autoconversion (as this includes riming) + //---------------------------------------------------------------------- + if (iwarmrain > 1) { + + if (ztp1[jk_i] <= rtt && zliqcld > zepsec) { + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), (double) 0.4); + + //------------------------------------------------------------------ + // Riming of snow by cloud water - implicit in lwc + //------------------------------------------------------------------ + if (zsnowcld > zepsec && zcovptot > (double) 0.01) { + + // Calculate riming term + // Factor of liq water taken out because implicit + zsnowrime = (double) 0.3*zcovptot*ptsphy*(*yrecldp) + .rcl_const7s*zfallcorr*(pow((zrho*zsnowcld*(*yrecldp).rcl_const1s), + (*yrecldp).rcl_const8s)); + + // Limit snow riming term + zsnowrime = fmin(zsnowrime, (double) 1.0); + + zsolqb[3 + 5*(0)] = zsolqb[3 + 5*(0)] + zsnowrime; + + } + + //------------------------------------------------------------------ + // Riming of ice by cloud water - implicit in lwc + // NOT YET ACTIVE + //------------------------------------------------------------------ + // IF (ZICECLD(JL)>ZEPSEC .AND. ZA(JL,JK)>0.01_JPRB) THEN + // + // ! Calculate riming term + // ! Factor of liq water taken out because implicit + // ZSNOWRIME(JL) = ZA(JL,JK)*PTSPHY*RCL_CONST7S*ZFALLCORR & + // & *(ZRHO(JL)*ZICECLD(JL)*RCL_CONST1S)**RCL_CONST8S + // + // ! Limit ice riming term + // ZSNOWRIME(JL)=MIN(ZSNOWRIME(JL),1.0_JPRB) + // + // ZSOLQB(JL,NCLDQI,NCLDQL) = ZSOLQB(JL,NCLDQI,NCLDQL) + ZSNOWRIME(JL) + // + // ENDIF + } + + } + // on IWARMRAIN > 1 + + + //---------------------------------------------------------------------- + // 4.4a MELTING OF SNOW and ICE + // with new implicit solver this also has to treat snow or ice + // precipitating from the level above... i.e. local ice AND flux. + // in situ ice and snow: could arise from LS advection or warming + // falling ice and snow: arrives by precipitation process + //---------------------------------------------------------------------- + + zicetot = zqxfg[1] + zqxfg[3]; + zmeltmax = (double) 0.0; + + // If there are frozen hydrometeors present and dry-bulb temperature > 0degC + if (zicetot > zepsec && ztp1[jk_i] > rtt) { + + // Calculate subsaturation + zsubsat = fmax(zqsice - zqx[4], (double) 0.0); + + // Calculate difference between dry-bulb (ZTP1) and the temperature + // at which the wet-bulb=0degC (rtt-ZSUBSAT*....) using an approx. + // Melting only occurs if the wet-bulb temperature >0 + // i.e. warming of ice particle due to melting > cooling + // due to evaporation. + ztdmtw0 = ztp1[jk_i] - rtt - zsubsat*(ztw1 + ztw2*(pap[jl + klon*(jk + + klev*ibl)] - ztw3) - ztw4*(ztp1[jk_i] - ztw5)); + // Not implicit yet... + // Ensure ZCONS1 is positive so that ZMELTMAX=0 if ZTDMTW0<0 + zcons1 = + fabs(ptsphy*((double) 1.0 + (double) 0.5*ztdmtw0) / (*yrecldp).rtaumel); + zmeltmax = fmax(ztdmtw0*zcons1*zrldcp, (double) 0.0); + } + + // Loop over frozen hydrometeors (ice, snow) + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (iphase[jm] == 2) { + jn = imelt[jm]; + if (zmeltmax > zepsec && zicetot > zepsec) { + // Apply melting in same proportion as frozen hydrometeor fractions + zalfa = zqxfg[jm] / zicetot; + zmelt = fmin(zqxfg[jm], zalfa*zmeltmax); + // needed in first guess + // This implies that zqpretot has to be recalculated below + // since is not conserved here if ice falls and liquid doesn't + zqxfg[jm] = zqxfg[jm] - zmelt; + zqxfg[-1 + jn] = zqxfg[-1 + jn] + zmelt; + zsolqa[-1 + jn + 5*jm] = zsolqa[-1 + jn + 5*jm] + zmelt; + zsolqa[jm + 5*(-1 + jn)] = zsolqa[jm + 5*(-1 + jn)] - zmelt; + } + } + } + + //---------------------------------------------------------------------- + // 4.4b FREEZING of RAIN + //---------------------------------------------------------------------- + + // If rain present + if (zqx[2] > zepsec) { + + if (ztp1[jk_i] <= rtt && ztp1[jk_im1] > rtt) { + // Base of melting layer/top of refreezing layer so + // store rain/snow fraction for precip type diagnosis + // If mostly rain, then supercooled rain slow to freeze + // otherwise faster to freeze (snow or ice pellets) + zqpretot = fmax(zqx[3] + zqx[2], zepsec); + prainfrac_toprfz[jl + klon*ibl] = zqx[2] / zqpretot; + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + llrainliq = true; + } else { + llrainliq = false; + } + } + + // If temperature less than zero + if (ztp1[jk_i] < rtt) { + + if (prainfrac_toprfz[jl + klon*ibl] > 0.8) { + + // Majority of raindrops completely melted + // Refreezing is by slow heterogeneous freezing + + // Slope of rain particle size distribution + zlambda = + pow(((*yrecldp).rcl_fac1 / (zrho*zqx[2])), (*yrecldp).rcl_fac2); + + // Calculate freezing rate based on Bigg(1953) and Wisner(1972) + ztemp = (*yrecldp).rcl_fzrab*(ztp1[jk_i] - rtt); + zfrz = ptsphy*((*yrecldp).rcl_const5r / zrho)*(exp(ztemp) - (double) 1.) + *(pow(zlambda, (*yrecldp).rcl_const6r)); + zfrzmax = fmax(zfrz, (double) 0.0); + + } else { + + // Majority of raindrops only partially melted + // Refreeze with a shorter timescale (reverse of melting...for now) + + zcons1 = fabs(ptsphy*((double) 1.0 + (double) 0.5*(rtt - ztp1[jk_i]) + ) / (*yrecldp).rtaumel); + zfrzmax = fmax((rtt - ztp1[jk_i])*zcons1*zrldcp, (double) 0.0); + + } + + if (zfrzmax > zepsec) { + zfrz = fmin(zqx[2], zfrzmax); + zsolqa[3 + 5*(2)] = zsolqa[3 + 5*(2)] + zfrz; + zsolqa[2 + 5*(3)] = zsolqa[2 + 5*(3)] - zfrz; + } + } + + } + + + //---------------------------------------------------------------------- + // 4.4c FREEZING of LIQUID + //---------------------------------------------------------------------- + // not implicit yet... + zfrzmax = fmax(((*yrecldp).rthomo - ztp1[jk_i])*zrldcp, (double) 0.0); + + jm = 1; + jn = imelt[-1 + jm]; + if (zfrzmax > zepsec && zqxfg[-1 + jm] > zepsec) { + zfrz = fmin(zqxfg[-1 + jm], zfrzmax); + zsolqa[-1 + jn + 5*(-1 + jm)] = zsolqa[-1 + jn + 5*(-1 + jm)] + zfrz; + zsolqa[-1 + jm + 5*(-1 + jn)] = zsolqa[-1 + jm + 5*(-1 + jn)] - zfrz; + } + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF RAIN/SNOW + //---------------------------------------------------------------------- + + //---------------------------------------- + // Rain evaporation scheme from Sundquist + //---------------------------------------- + if (ievaprain == 1) { + + // Rain + + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + zqe = (zqx[4] - za[jk_i]*zqsliq) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsliq)); + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq; + + if (llo1) { + // note: zpreclr is a rain flux + zpreclr = zqxfg[2]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + + 1)*ibl)]) / (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(double) 0.5*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsliq; + zdpr = zcovpclr*zbeta*(zqsliq - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Evaporate rain + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + + //--------------------------------------------------------- + // Rain evaporation scheme based on Abel and Boutle (2013) + //--------------------------------------------------------- + } else if (ievaprain == 2) { + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for rain evaporation + // to avoid cloud formation and saturation of the grid box + //----------------------------------------------------------------------- + // Limit RH for rain evaporation dependent on precipitation fraction + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + + // Critical relative humidity + //ZRHC=RAMID + //ZSIGK=PAP(JL,JK)/PAPH(JL,KLEV+1) + // Increase RHcrit to 1.0 towards the surface (eta>0.8) + //IF(ZSIGK > 0.8_JPRB) THEN + // ZRHC=RAMID+(1.0_JPRB-RAMID)*((ZSIGK-0.8_JPRB)/0.2_JPRB)**2 + //ENDIF + //ZZRH = MIN(ZRHC,ZZRH) + + // Further limit RH for rain evaporation to 80% (RHcrit in free troposphere) + zzrh = fmin((double) 0.8, zzrh); + + zqe = fmax((double) 0.0, fmin(zqx[4], zqsliq)); + + llo1 = zcovpclr > zepsec && zqxfg[2] > zepsec && zqe < zzrh*zqsliq; + + if (llo1) { + + //------------------------------------------- + // Abel and Boutle (2012) evaporation + //------------------------------------------- + // Calculate local precipitation (kg/kg) + zpreclr = zqxfg[2] / zcovptot; + + // Fallspeed air density correction + zfallcorr = pow(((*yrecldp).rdensref / zrho), 0.4); + + // Saturation vapour pressure with respect to liquid phase + zesatliq = rv / rd*((double)(r2es*exp((r3les*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4les)))); + + // Slope of particle size distribution + zlambda = pow(((*yrecldp).rcl_fac1 / (zrho*zpreclr)), (*yrecldp).rcl_fac2); // ZPRECLR=kg/kg + + zevap_denom = (*yrecldp).rcl_cdenom1*zesatliq - (*yrecldp) + .rcl_cdenom2*ztp1[jk_i]*zesatliq + (*yrecldp) + .rcl_cdenom3*(pow(ztp1[jk_i], (double) 3.))*pap[jl + klon*(jk + + klev*ibl)]; + + // Temperature dependent conductivity + zcorr2 = (pow((ztp1[jk_i] / (double) 273.), (double) 1.5))*(double) + 393. / (ztp1[jk_i] + (double) 120.); + zka = (*yrecldp).rcl_ka273*zcorr2; + + zsubsat = fmax(zzrh*zqsliq - zqe, (double) 0.0); + + zbeta = ((double) 0.5 / zqsliq)*(pow(ztp1[jk_i], (double) 2.)) + *zesatliq*(*yrecldp).rcl_const1r*(zcorr2 / zevap_denom)*((double) 0.78 / + (pow(zlambda, (*yrecldp).rcl_const4r)) + (*yrecldp) + .rcl_const2r*(pow((zrho*zfallcorr), (double) 0.5)) / ((pow(zcorr2, + (double) 0.5))*(pow(zlambda, (*yrecldp).rcl_const3r)))); + + zdenom = (double) 1.0 + zbeta*ptsphy; //*ZCORQSLIQ(JL) + zdpevap = zcovpclr*zbeta*ptsphy*zsubsat / zdenom; + + //--------------------------------------------------------- + // Add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce rain to zero and model + // produces small amounts of rainfall everywhere. + //--------------------------------------------------------- + + // Limit rain evaporation + zevap = fmin(zdpevap, zqxfg[2]); + + zsolqa[4 + 5*(2)] = zsolqa[4 + 5*(2)] + zevap; + zsolqa[2 + 5*(4)] = zsolqa[2 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqxfg[2])); + + // Update fg field + zqxfg[2] = zqxfg[2] - zevap; + + } + + } + // on IEVAPRAIN + + //---------------------------------------------------------------------- + // 4.5 EVAPORATION OF SNOW + //---------------------------------------------------------------------- + // Snow + if (ievapsnow == 1) { + + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice)); + llo1 = zcovpclr > zepsec && zqxfg[3] > zepsec && zqe < zzrh*zqsice; + + if (llo1) { + // note: zpreclr is a rain flux a + zpreclr = zqxfg[3]*zcovpclr / copysign(fmax(fabs(zcovptot*zdtgdp), + zepsilon), zcovptot*zdtgdp); + + //-------------------------------------- + // actual microphysics formula in zbeta + //-------------------------------------- + + zbeta1 = sqrt(pap[jl + klon*(jk + klev*ibl)] / paph[jl + klon*(klev + (klev + + 1)*ibl)]) / (*yrecldp).rvrfactor*zpreclr / fmax(zcovpclr, zepsec); + + zbeta = rg*(*yrecldp).rpecons*(pow(zbeta1, (double) 0.5777)); + + zdenom = (double) 1.0 + zbeta*ptsphy*zcorqsice; + zdpr = zcovpclr*zbeta*(zqsice - zqe) / zdenom*zdp*zrg_r; + zdpevap = zdpr*zdtgdp; + + //--------------------------------------------------------- + // add evaporation term to explicit sink. + // this has to be explicit since if treated in the implicit + // term evaporation can not reduce snow to zero and model + // produces small amounts of snowfall everywhere. + //--------------------------------------------------------- + + // Evaporate snow + zevap = fmin(zdpevap, zqxfg[3]); + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqxfg[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + //--------------------------------------------------------- + } else if (ievapsnow == 2) { + + + + //----------------------------------------------------------------------- + // Calculate relative humidity limit for snow evaporation + //----------------------------------------------------------------------- + zzrh = (*yrecldp).rprecrhmax + ((double) 1.0 - (*yrecldp).rprecrhmax) + *zcovpmax / fmax(zepsec, (double) 1.0 - za[jk_i]); + zzrh = fmin(fmax(zzrh, (*yrecldp).rprecrhmax), (double) 1.0); + zqe = (zqx[4] - za[jk_i]*zqsice) / fmax(zepsec, (double) 1.0 - + za[jk_i]); + + //--------------------------------------------- + // humidity in moistest ZCOVPCLR part of domain + //--------------------------------------------- + zqe = fmax((double) 0.0, fmin(zqe, zqsice)); + llo1 = zcovpclr > zepsec && zqx[3] > zepsec && zqe < zzrh*zqsice; + + if (llo1) { + + // Calculate local precipitation (kg/kg) + zpreclr = zqx[3] / zcovptot; + zvpice = ((double)(r2es*exp((r3ies*(ztp1[jk_i] - rtt))/(ztp1[jk_i] - r4ies))))*rv / rd; + + // Particle size distribution + // ZTCG increases Ni with colder temperatures - essentially a + // Fletcher or Meyers scheme? + ztcg = (double) 1.0; //v1 EXP(RCL_X3I*(273.15_JPRB-ZTP1(JL,JK))/8.18_JPRB) + // ZFACX1I modification is based on Andrew Barrett's results + zfacx1s = (double) 1.0; //v1 (ZICE0/1.E-5_JPRB)**0.627_JPRB + + zaplusb = (*yrecldp).rcl_apb1*zvpice - (*yrecldp).rcl_apb2*zvpice*ztp1[jk_i] + + pap[jl + klon*(jk + klev*ibl)]*(*yrecldp).rcl_apb3*(pow(ztp1[jk_i], 3)); + zcorrfac = pow((1.0 / zrho), 0.5); + zcorrfac2 = + (pow((ztp1[jk_i] / 273.0), 1.5))*(393.0 / (ztp1[jk_i] + 120.0)) + ; + + zpr02 = zrho*zpreclr*(*yrecldp).rcl_const1s / (ztcg*zfacx1s); + + zterm1 = (zqsice - zqe)*(pow(ztp1[jk_i], 2)) + *zvpice*zcorrfac2*ztcg*(*yrecldp).rcl_const2s*zfacx1s / + (zrho*zaplusb*zqsice); + zterm2 = 0.65*(*yrecldp).rcl_const6s*(pow(zpr02, (*yrecldp).rcl_const4s)) + + (*yrecldp).rcl_const3s*(pow(zcorrfac, 0.5))*(pow(zrho, 0.5))*(pow(zpr02, + (*yrecldp).rcl_const5s)) / (pow(zcorrfac2, 0.5)); + + zdpevap = fmax(zcovpclr*zterm1*zterm2*ptsphy, (double) 0.0); + + //-------------------------------------------------------------------- + // Limit evaporation to snow amount + //-------------------------------------------------------------------- + zevap = fmin(zdpevap, zevaplimice); + zevap = fmin(zevap, zqx[3]); + + + zsolqa[4 + 5*(3)] = zsolqa[4 + 5*(3)] + zevap; + zsolqa[3 + 5*(4)] = zsolqa[3 + 5*(4)] - zevap; + + //------------------------------------------------------------- + // Reduce the total precip coverage proportional to evaporation + // to mimic the previous scheme which had a diagnostic + // 2-flux treatment, abandoned due to the new prognostic precip + //------------------------------------------------------------- + zcovptot = fmax((*yrecldp).rcovpmin, zcovptot - fmax((double) 0.0, + (zcovptot - za[jk_i])*zevap / zqx[3])); + + //Update first guess field + zqxfg[3] = zqxfg[3] - zevap; + + } + + } + // on IEVAPSNOW + + //-------------------------------------- + // Evaporate small precipitation amounts + //-------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + if (llfall[jm]) { + if (zqxfg[jm] < (*yrecldp).rlmin) { + zsolqa[4 + 5*jm] = zsolqa[4 + 5*jm] + zqxfg[jm]; + zsolqa[jm + 5*(4)] = zsolqa[jm + 5*(4)] - zqxfg[jm]; + } + } + } + + //###################################################################### + // 5.0 *** SOLVERS FOR A AND L *** + // now use an implicit solution rather than exact solution + // solver is forward in time, upstream difference for advection + //###################################################################### + + //--------------------------- + // 5.1 solver for cloud cover + //--------------------------- + zanew = (za[jk_i] + zsolac) / ((double) 1.0 + zsolab); + zanew = fmin(zanew, (double) 1.0); + if (zanew < (*yrecldp).ramin) { + zanew = (double) 0.0; + } + zda = zanew - zaorig; + //--------------------------------- + // variables needed for next level + //--------------------------------- + zanewm1 = zanew; + + //-------------------------------- + // 5.2 solver for the microphysics + //-------------------------------- + + //-------------------------------------------------------------- + // Truncate explicit sinks to avoid negatives + // Note: Species are treated in the order in which they run out + // since the clipping will alter the balance for the other vars + //-------------------------------------------------------------- + + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + llindex3[jn + 5*jm] = false; + } + zsinksum[jm] = (double) 0.0; + } + + //---------------------------- + // collect sink terms and mark + //---------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + zsinksum[jm] = zsinksum[jm] - zsolqa[jm + 5*jn]; // +ve total is bad + } + } + + //--------------------------------------- + // calculate overshoot and scaling factor + //--------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zmax = fmax(zqx[jm], zepsec); + zrat = fmax(zsinksum[jm], zmax); + zratio[jm] = zmax / zrat; + } + + //-------------------------------------------- + // scale the sink terms, in the correct order, + // recalculating the scale factor each time + //-------------------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zsinksum[jm] = (double) 0.0; + } + + //---------------- + // recalculate sum + //---------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + psum_solqa = 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + psum_solqa = psum_solqa + zsolqa[jm + 5*jn]; + } + // ZSINKSUM(JL,JM)=ZSINKSUM(JL,JM)-SUM(ZSOLQA(JL,JM,1:NCLV)) + zsinksum[jm] = zsinksum[jm] - psum_solqa; + //--------------------------- + // recalculate scaling factor + //--------------------------- + zmm = fmax(zqx[jm], zepsec); + zrr = fmax(zsinksum[jm], zmm); + zratio[jm] = zmm / zrr; + //------ + // scale + //------ + zzratio = zratio[jm]; + //DIR$ IVDEP + //DIR$ PREFERVECTOR + for (jn = 0; jn <= 5 + -1; jn += 1) { + if (zsolqa[jm + 5*jn] < (double) 0.0) { + zsolqa[jm + 5*jn] = zsolqa[jm + 5*jn]*zzratio; + zsolqa[jn + 5*jm] = zsolqa[jn + 5*jm]*zzratio; + } + } + } + + //-------------------------------------------------------------- + // 5.2.2 Solver + //------------------------ + + //------------------------ + // set the LHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + for (jn = 0; jn <= 5 + -1; jn += 1) { + //---------------------------------------------- + // diagonals: microphysical sink terms+transport + //---------------------------------------------- + if (jn + 1 == jm + 1) { + zqlhs[jn + 5*jm] = (double) 1.0 + zfallsink[jm]; + for (jo = 0; jo <= 5 + -1; jo += 1) { + zqlhs[jn + 5*jm] = zqlhs[jn + 5*jm] + zsolqb[jo + 5*jn]; + } + //------------------------------------------ + // non-diagonals: microphysical source terms + //------------------------------------------ + } else { + zqlhs[jn + 5*jm] = -zsolqb[jn + 5*jm]; // here is the delta T - missing from doc. + } + } + } + + //------------------------ + // set the RHS of equation + //------------------------ + for (jm = 0; jm <= 5 + -1; jm += 1) { + //--------------------------------- + // sum the explicit source and sink + //--------------------------------- + zexplicit = (double) 0.0; + for (jn = 0; jn <= 5 + -1; jn += 1) { + zexplicit = zexplicit + zsolqa[jm + 5*jn]; // sum over middle index + } + zqxn[jm] = zqx[jm] + zexplicit; + } + + //----------------------------------- + // *** solve by LU decomposition: *** + //----------------------------------- + + // Note: This fast way of solving NCLVxNCLV system + // assumes a good behaviour (i.e. non-zero diagonal + // terms with comparable orders) of the matrix stored + // in ZQLHS. For the moment this is the case but + // be aware to preserve it when doing eventual + // modifications. + + // Non pivoting recursive factorization + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + // number of steps + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + // row index + zqlhs[jm + 5*jn] = zqlhs[jm + 5*jn] / zqlhs[jn + 5*jn]; + for (ik = jn + 1; ik <= 5 + -1; ik += 1) { + // column index + zqlhs[jm + 5*ik] = zqlhs[jm + 5*ik] - zqlhs[jm + 5*jn]*zqlhs[jn + 5*ik]; + } + } + } + + // Backsubstitution + // step 1 + for (jn = 1; jn <= 5 + -1; jn += 1) { + for (jm = 0; jm <= jn + 1 - 1 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + } + // step 2 + zqxn[4] = zqxn[4] / zqlhs[4 + 5*(4)]; + for (jn = -2 + 5; jn >= 1 + -1; jn += -1) { + for (jm = jn + 1; jm <= 5 + -1; jm += 1) { + zqxn[jn] = zqxn[jn] - zqlhs[jn + 5*jm]*zqxn[jm]; + } + zqxn[jn] = zqxn[jn] / zqlhs[jn + 5*jn]; + } + + // Ensure no small values (including negatives) remain in cloud variables nor + // precipitation rates. + // Evaporate l,i,r,s to water vapour. Latent heating taken into account below + for (jn = 0; jn <= 5 - 1 + -1; jn += 1) { + if (zqxn[jn] < zepsec) { + zqxn[4] = zqxn[4] + zqxn[jn]; + zqxn[jn] = (double) 0.0; + } + } + + //-------------------------------- + // variables needed for next level + //-------------------------------- + for (jm = 0; jm <= 5 + -1; jm += 1) { + zqxnm1[jm] = zqxn[jm]; + zqxn2d[jm] = zqxn[jm]; + } + + //------------------------------------------------------------------------ + // 5.3 Precipitation/sedimentation fluxes to next level + // diagnostic precipitation fluxes + // It is this scaled flux that must be used for source to next layer + //------------------------------------------------------------------------ + + for (jm = 0; jm <= 5 + -1; jm += 1) { + zpfplsx[jk_ip1 + 2*jm] = zfallsink[jm]*zqxn[jm]*zrdtgdp; + } + + // Ensure precipitation fraction is zero if no precipitation + zqpretot = + zpfplsx[jk_ip1 + 2*(3)] + zpfplsx[jk_ip1 + 2*(2)]; + if (zqpretot < zepsec) { + zcovptot = (double) 0.0; + } + + //###################################################################### + // 6 *** UPDATE TENDANCIES *** + //###################################################################### + + //-------------------------------- + // 6.1 Temperature and CLV budgets + //-------------------------------- + + for (jm = 0; jm <= 5 - 1 + -1; jm += 1) { + + // calculate fluxes in and out of box for conservation of TL + zfluxq[jm] = zpsupsatsrce[jm] + zconvsrce[jm] + zfallsrce[jm] - + (zfallsink[jm] + zconvsink[jm])*zqxn[jm]; + + if (iphase[jm] == 1) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + + klon*(jk + klev*ibl)] + ralvdcp*(zqxn[jm] - zqx[jm] - + zfluxq[jm])*zqtmst; + } + + if (iphase[jm] == 2) { + tendency_loc_t[jl + klon*(jk + klev*ibl)] = tendency_loc_t[jl + + klon*(jk + klev*ibl)] + ralsdcp*(zqxn[jm] - zqx[jm] - + zfluxq[jm])*zqtmst; + } + + //---------------------------------------------------------------------- + // New prognostic tendencies - ice,liquid rain,snow + // Note: CLV arrays use PCLV in calculation of tendency while humidity + // uses ZQX. This is due to clipping at start of cloudsc which + // include the tendency already in TENDENCY_LOC_T and TENDENCY_LOC_q. ZQX was reset + //---------------------------------------------------------------------- + tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] = tendency_loc_cld[jl + klon*(jk + klev*(jm + 5*ibl))] + + (zqxn[jm] - zqx0[jm])*zqtmst; + + } + + //---------------------- + // 6.2 Humidity budget + //---------------------- + tendency_loc_q[jl + klon*(jk + klev*ibl)] = tendency_loc_q[jl + + klon*(jk + klev*ibl)] + (zqxn[4] - zqx[4])*zqtmst; + + //------------------- + // 6.3 cloud cover + //----------------------- + tendency_loc_a[jl + klon*(jk + klev*ibl)] = + tendency_loc_a[jl + klon*(jk + klev*ibl)] + zda*zqtmst; + + //-------------------------------------------------- + // Copy precipitation fraction into output variable + //------------------------------------------------- + pcovptot[jl + klon*(jk + klev*ibl)] = zcovptot; + + } + } + + // on vertical level JK + //---------------------------------------------------------------------- + // END OF VERTICAL LOOP + //---------------------------------------------------------------------- + + //###################################################################### + // 8 *** FLUX/DIAGNOSTICS COMPUTATIONS *** + //###################################################################### + + //-------------------------------------------------------------------- + // Copy general precip arrays back into PFP arrays for GRIB archiving + // Add rain and liquid fluxes, ice and snow fluxes + //-------------------------------------------------------------------- + pfplsl[jl + klon*(jk + (klev + 1)*ibl)] = + zpfplsx[jk_i + 2*(2)] + zpfplsx[jk_i + 2*(0)]; + pfplsn[jl + klon*(jk + (klev + 1)*ibl)] = + zpfplsx[jk_i + 2*(3)] + zpfplsx[jk_i + 2*(1)]; + + if (1 <= jk + 1 && jk + 1 <= klev) { + + zgdph_r = -zrg_r*(paph[jl + klon*(1 + jk + (klev + 1)*ibl)] - paph[jl + klon*(jk + + (klev + 1)*ibl)])*zqtmst; + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqlf[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqif[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqlng[jl + klon*(jk + (klev + 1)*ibl)]; + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqnng[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqltur[jl + klon*(jk + (klev + 1)*ibl)]; + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfsqitur[jl + klon*(jk + (klev + 1)*ibl)]; + + zalfaw = zfoealfa; + + // Liquid , LS scheme minus detrainment + pfsqlf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqlf[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[0] - zqx0[0] + pvfl[jl + klon*(jk + klev*ibl) + ]*ptsphy - zalfaw*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // liquid, negative numbers + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqlng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[0]*zgdph_r; + + // liquid, vertical diffusion + pfsqltur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqltur[jl + klon*(1 + jk + + (klev + 1)*ibl)] + pvfl[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // Rain, LS scheme + pfsqrf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqrf[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[2] - zqx0[2])*zgdph_r; + // rain, negative numbers + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqrng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[2]*zgdph_r; + + // Ice , LS scheme minus detrainment + pfsqif[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqif[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[1] - zqx0[1] + pvfi[jl + klon*(jk + klev*ibl) + ]*ptsphy - ((double) 1.0 - zalfaw)*plude[jl + klon*(jk + klev*ibl)])*zgdph_r; + // ice, negative numbers + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqnng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[1]*zgdph_r; + + // ice, vertical diffusion + pfsqitur[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqitur[jl + klon*(1 + jk + + (klev + 1)*ibl)] + pvfi[jl + klon*(jk + klev*ibl)]*ptsphy*zgdph_r; + + // snow, LS scheme + pfsqsf[jl + klon*(1 + jk + (klev + 1)*ibl)] = pfsqsf[jl + klon*(1 + jk + (klev + + 1)*ibl)] + (zqxn2d[3] - zqx0[3])*zgdph_r; + // snow, negative numbers + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] = + pfcqsng[jl + klon*(1 + jk + (klev + 1)*ibl)] + zlneg[3]*zgdph_r; + } + + //----------------------------------- + // enthalpy flux due to precipitation + //----------------------------------- + pfhpsl[jl + klon*(jk + (klev + 1)*ibl)] = + -rlvtt*pfplsl[jl + klon*(jk + (klev + 1)*ibl)]; + pfhpsn[jl + klon*(jk + (klev + 1)*ibl)] = + -rlstt*pfplsn[jl + klon*(jk + (klev + 1)*ibl)]; + } +} + diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp new file mode 100644 index 00000000..8e3f7d91 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp @@ -0,0 +1,619 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "cloudsc_driver.h" +#include "cloudsc_c.kernel" + +#include +#include "mycpu.h" +#include + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +void cloudsc_driver(int numthreads, int numcols, int nproma) { + + cl::sycl::default_selector device_select; + cl::sycl::queue q( device_select ); + + printf("Running on %s\n", q.get_device().get_info().c_str()); + + double *tend_tmp_u; + double *tend_tmp_v; + double *tend_tmp_t; + double *tend_tmp_q; + double *tend_tmp_o3; + double *tend_tmp_a; + double *tend_tmp_cld; + + double *tend_loc_u; + double *tend_loc_v; + double *tend_loc_t; + double *tend_loc_q; + double *tend_loc_o3; + double *tend_loc_a; + double *tend_loc_cld; + + double *tend_cml_u; + double *tend_cml_v; + double *tend_cml_t; + double *tend_cml_q; + double *tend_cml_o3; + double *tend_cml_a; + double *tend_cml_cld; + + double ptsphy; //! Physics timestep + + double *plcrit_aer; + double *picrit_aer; + double *pre_ice; + double *pccn; //! liquid cloud condensation nuclei + double *pnice; //! ice number concentration (cf. CCN) + double *pt; //! T at start of callpar + double *pq; //! Q at start of callpar + double *pvfa; //! CC from VDF scheme + double *pvfl; //! Liq from VDF scheme + double *pvfi; //! Ice from VDF scheme + double *pdyna; //! CC from Dynamics + double *pdynl; //! Liq from Dynamics + double *pdyni; //! Liq from Dynamics + double *phrsw; //! Short-wave heating rate + double *phrlw; //! Long-wave heating rate + double *pvervel; //! Vertical velocity + double *pap; //! Pressure on full levels + double *paph; //! Pressure on half levels + double *plsm; //! Land fraction (0-1) + int *ldcum; + int *ktype; //! Convection type 0,1,2 + double *plu; //! Conv. condensate + double *plude; //! Conv. detrained water + double *plude_tmp; + double *psnde; //! Conv. detrained snow + double *pmfu; //! Conv. mass flux up + double *pmfd; //! Conv. mass flux down + double *pa; //! Original Cloud fraction (t) + + double *pclv; + double *psupsat; + + double *pcovptot; //! Precip fraction + double *prainfrac_toprfz; + double *pfsqlf; //! Flux of liquid + double *pfsqif; //! Flux of ice + double *pfcqlng; //! -ve corr for liq + double *pfcqnng; //! -ve corr for ice + double *pfsqrf; //! Flux diagnostics + double *pfsqsf; //! for DDH, generic + double *pfcqrng; //! rain + double *pfcqsng; //! snow + double *pfsqltur; //! liquid flux due to VDF + double *pfsqitur; //! ice flux due to VDF + double *pfplsl; //! liq+rain sedim flux + double *pfplsn; //! ice+snow sedim flux + double *pfhpsl; //! Enthalpy flux for liq + double *pfhpsn; //! Enthalpy flux for ice + + /* Define or query data dimensions from input file */ + int klon, nlev; + int nblocks = (numcols / nproma) + min(numcols % nproma, 1); + + double zinfo[4][numthreads]; + const double zhpm = 12482329.0; // IBM P7 HPM flop count for 100 points at L137 + + int nclv; // number of microphysics variables + int ncldql; // liquid cloud water + int ncldqi; // ice cloud water + int ncldqr; // rain water + int ncldqs; // snow + int ncldqv; // vapour + + nclv = 5; // number of microphysics variables + ncldql = 1; // liquid cloud water + ncldqi = 2; // ice cloud water + ncldqr = 3; // rain water + ncldqs = 4; // snow + ncldqv = 5; // vapour + + struct TECLDP *yrecldp = (struct TECLDP*)malloc(sizeof(struct TECLDP)); + + query_state(&klon, &nlev); + + tend_loc_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_cml_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_tmp_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + + plcrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + picrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pre_ice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pccn = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pnice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pt = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pq = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfi = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyna = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdynl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyni = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrsw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrlw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvervel = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pap = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + paph = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + plsm = (double*) malloc( sizeof(double) * nblocks*nproma ); + ldcum = (int*) malloc( sizeof(int) * nblocks*nproma ); + ktype = (int*) malloc( sizeof(int) * nblocks*nproma ); + plu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + plude = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + psnde = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfd = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pclv = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + psupsat = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pcovptot = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + prainfrac_toprfz = (double*) malloc( sizeof(double) * nblocks*nproma ); + pfsqlf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqif = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqnng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqlng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqrf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqsf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqrng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqsng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqltur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqitur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + + + double rg; + double rd; + double rcpd; + double retv; + double rlvtt; + double rlstt; + double rlmlt; + double rtt; + double rv; + double r2es; + double r3les; + double r3ies; + double r4les; + double r4ies; + double r5les; + double r5ies; + double r5alvcp; + double r5alscp; + double ralvdcp; + double ralsdcp; + double ralfdcp; + double rtwat; + double rtice; + double rticecu; + double rtwat_rtice_r; + double rtwat_rticecu_r; + double rkoop1; + double rkoop2; + + // device declarations + double *d_plcrit_aer; + double *d_picrit_aer; + double *d_pre_ice; + double *d_pccn; + double *d_pnice; + double *d_pt; + double *d_pq; + double *d_tend_loc_t; + double *d_tend_loc_q; + double *d_tend_loc_a; + double *d_tend_loc_cld; + double *d_tend_tmp_t; + double *d_tend_tmp_q; + double *d_tend_tmp_a; + double *d_tend_tmp_cld; + double *d_pvfa; + double *d_pvfl; + double *d_pvfi; + double *d_pdyna; + double *d_pdynl; + double *d_pdyni; + double *d_phrsw; + double *d_phrlw; + double *d_pvervel; + double *d_pap; + double *d_paph; + double *d_plsm; + int *d_ktype; + double *d_plu; + double *d_plude; + double *d_psnde; + double *d_pmfu; + double *d_pmfd; + double *d_pa; + double *d_pclv; + double *d_psupsat; + struct TECLDP *d_yrecldp; + double *d_pcovptot; + double *d_prainfrac_toprfz; + double *d_pfsqlf; + double *d_pfsqif; + double *d_pfcqnng; + double *d_pfcqlng; + double *d_pfsqrf; + double *d_pfsqsf; + double *d_pfcqrng; + double *d_pfcqsng; + double *d_pfsqltur; + double *d_pfsqitur; + double *d_pfplsl; + double *d_pfplsn; + double *d_pfhpsl; + double *d_pfhpsn; + // end device declarations + + + d_plcrit_aer = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_picrit_aer = cl::sycl::malloc_device( nblocks*nlev*nproma, q); + d_pre_ice = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pccn = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pnice = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pt = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pq = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_t = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_q = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_a = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_cld = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_tend_tmp_t = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_q = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_a = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_cld = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_pvfa = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvfl = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvfi = cl::sycl::malloc_device(nblocks*nlev*nproma,q ); + d_pdyna = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pdynl = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pdyni = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_phrsw = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_phrlw = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvervel = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pap = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_paph = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_plsm = cl::sycl::malloc_device(nblocks*nproma, q); + d_ktype = cl::sycl::malloc_device(nblocks*nproma, q); + d_plu = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_plude = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_psnde = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pmfu = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pmfd = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pa = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pclv = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_psupsat = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_yrecldp = (TECLDP*) cl::sycl::malloc_device( sizeof(TECLDP), q); //cl::sycl::malloc_device(sizeof(struct TECLDP), q); //cl::sycl::malloc_device(1, q); + d_pcovptot = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_prainfrac_toprfz = cl::sycl::malloc_device(nblocks*nproma, q); + d_pfsqlf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqif = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqnng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqlng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqrf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqsf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqrng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqsng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqltur = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqitur = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfplsl = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfplsn = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfhpsl = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfhpsn = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + + load_state(klon, nlev, nclv, numcols, nproma, &ptsphy, plcrit_aer, picrit_aer, + pre_ice, pccn, pnice, pt, pq, + tend_cml_t, tend_cml_q, tend_cml_a, tend_cml_cld, + tend_tmp_t, tend_tmp_q, tend_tmp_a, tend_tmp_cld, + pvfa, pvfl, pvfi, pdyna, pdynl, pdyni, + phrsw, phrlw, pvervel, pap, paph, plsm, ktype, plu, + plude, psnde, pmfu, pmfd, pa, pclv, psupsat, yrecldp, + &rg, &rd, &rcpd, &retv, &rlvtt, &rlstt, + &rlmlt, &rtt, &rv, &r2es, &r3les, &r3ies, + &r4les, &r4ies, &r5les, &r5ies, &r5alvcp, &r5alscp, + &ralvdcp, &ralsdcp, &ralfdcp, &rtwat, + &rtice, &rticecu, &rtwat_rtice_r, &rtwat_rticecu_r, + &rkoop1, &rkoop2 ); + + // host to device + // either + // q.memcpy(bytes) + // q.copy<>(count) + q.memcpy(d_plcrit_aer, plcrit_aer, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_picrit_aer, picrit_aer, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pre_ice, pre_ice, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pccn, pccn, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pnice, pnice, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pt, pt, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pq, pq, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_t, tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_q, tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_a, tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_cld, tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_tend_tmp_t, tend_tmp_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_q, tend_tmp_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_a, tend_tmp_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_cld, tend_tmp_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_pvfa, pvfa, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvfl, pvfl, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvfi, pvfi, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdyna, pdyna, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdynl, pdynl, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdyni, pdyni, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_phrsw, phrsw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_phrlw, phrlw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvervel, pvervel, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pap, pap, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_paph, paph, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(d_plsm, plsm, sizeof(double) * nblocks*nproma); + q.memcpy(d_ktype, ktype, sizeof(int) * nblocks*nproma); + q.memcpy(d_plu, plu, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_plude, plude, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_psnde, psnde, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pmfu, pmfu, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pmfd, pmfd, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pa, pa, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pclv, pclv, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_psupsat, psupsat, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_yrecldp, yrecldp, sizeof(TECLDP)); + q.wait(); + // end host to device + + double t1 = omp_get_wtime(); + + int b, bsize, icalls=0, igpc=numcols; + int coreid = mycpu(); + int tid = omp_get_thread_num(); + double start = omp_get_wtime(); + + int jkglo = 0; + int ibl = (jkglo - 1) / nproma + 1; + int icend = min(nproma, numcols - jkglo + 1); + + cl::sycl::range<1> global(numcols); + cl::sycl::range<1> local(nproma); + + q.submit([&](cl::sycl::handler &h) { + cl::sycl::stream out_stream(16384, 512, h); + h.parallel_for( cl::sycl::nd_range<1>( global, local), [=] (cl::sycl::nd_item<1> item_ct1) { + + cloudsc_c(1, icend, nproma, ptsphy, d_pt, d_pq, + d_tend_tmp_t, d_tend_tmp_q, d_tend_tmp_a, d_tend_tmp_cld, + d_tend_loc_t, d_tend_loc_q, d_tend_loc_a, d_tend_loc_cld, + d_pvfa, d_pvfl, d_pvfi, + d_pdyna, d_pdynl, d_pdyni, + d_phrsw, d_phrlw, d_pvervel, + d_pap, d_paph, d_plsm, d_ktype, + d_plu, d_plude, d_psnde, d_pmfu, d_pmfd, + d_pa, d_pclv, d_psupsat, + d_plcrit_aer, d_picrit_aer, d_pre_ice, d_pccn, d_pnice, + d_pcovptot, d_prainfrac_toprfz, d_pfsqlf, + d_pfsqif, d_pfcqnng, d_pfcqlng, + d_pfsqrf, d_pfsqsf, d_pfcqrng, + d_pfcqsng, d_pfsqltur, d_pfsqitur, + d_pfplsl, d_pfplsn, d_pfhpsl, d_pfhpsn, d_yrecldp, + nblocks, rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, + rv, r2es, r3les, r3ies, r4les, r4ies, r5les, + r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, + rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, + rkoop1, rkoop2, out_stream, item_ct1); + + }); + }); + + q.wait(); + + double end = omp_get_wtime(); + + // device to host + q.memcpy(tend_loc_t, d_tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_q, d_tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_a, d_tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_cld, d_tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(phrlw, d_phrlw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(plude, d_plude, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(yrecldp, d_yrecldp, sizeof(TECLDP)); + q.memcpy(pcovptot, d_pcovptot, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(prainfrac_toprfz, d_prainfrac_toprfz, sizeof(double) * nblocks*nproma); + q.memcpy(pfsqlf, d_pfsqlf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqif, d_pfsqif, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqnng, d_pfcqnng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqlng, d_pfcqlng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqrf, d_pfsqrf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqsf, d_pfsqsf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqrng, d_pfcqrng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqsng, d_pfcqsng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqltur, d_pfsqltur, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqitur, d_pfsqitur, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfplsl, d_pfplsl, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfplsn, d_pfplsn, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfhpsl, d_pfhpsl, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfhpsn, d_pfhpsn, sizeof(double) * nblocks*(nlev+1)*nproma); + q.wait(); + // end device to host + + /* int msec = diff * 1000 / CLOCKS_PER_SEC; */ + zinfo[0][tid] = end - start; + zinfo[1][tid] = (double) coreid; + zinfo[2][tid] = (double) icalls; + zinfo[3][tid] = (double) igpc; + + double t2 = omp_get_wtime(); + + printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; + for (int t = 0; t < numthreads; t++) { + const double tloc = zinfo[0][t]; + const int coreid = (int) zinfo[1][t]; + const int icalls = (int) zinfo[2][t]; + const int igpc = (int) zinfo[3][t]; + zfrac = (double)igpc / (double)numcols; + if (tloc > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; + } else { + zmflops = 0.; + zthrput = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); + } + double tdiff = t2 - t1; + zfrac = 1.0; + if (tdiff > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; + } else { + zmflops = 0.0; + zthrput = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); + + cloudsc_validate(klon, nlev, nclv, numcols, nproma, + plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, + pfcqlng, pfcqnng, pfsqrf, pfsqsf, pfcqrng, pfcqsng, + pfsqltur, pfsqitur, pfplsl, pfplsn, pfhpsl, pfhpsn, + tend_loc_a, tend_loc_q, tend_loc_t, tend_loc_cld); + + free(plcrit_aer); + free(picrit_aer); + free(pre_ice); + free(pccn); + free(pnice); + free(pt); + free(pq); + free(pvfa); + free(pvfl); + free(pvfi); + free(pdyna); + free(pdynl); + free(pdyni); + free(phrsw); + free(phrlw); + free(pvervel); + free(pap); + free(paph); + free(plsm); + free(ktype); + free(plu); + free(plude); + free(psnde); + free(pmfu); + free(pmfd); + free(pa); + free(pclv); + free(psupsat); + free(pcovptot); + free(tend_loc_t); + free(tend_loc_q); + free(tend_loc_a); + free(tend_loc_cld); + free(tend_tmp_t); + free(tend_tmp_q); + free(tend_tmp_a); + free(tend_tmp_cld); + free(tend_cml_t); + free(tend_cml_q); + free(tend_cml_a); + free(tend_cml_cld); + free(prainfrac_toprfz); + free(pfsqlf); + free(pfsqif); + free(pfcqnng); + free(pfcqlng); + free(pfsqrf); + free(pfsqsf); + free(pfcqrng); + free(pfcqsng); + free(pfsqltur); + free(pfsqitur); + free(pfplsl); + free(pfplsn); + free(pfhpsl); + free(pfhpsn); + free(yrecldp); + + // free device + cl::sycl::free(d_plcrit_aer, q); + cl::sycl::free(d_picrit_aer, q); + cl::sycl::free(d_pre_ice, q); + cl::sycl::free(d_pccn, q); + cl::sycl::free(d_pnice, q); + cl::sycl::free(d_pt, q); + cl::sycl::free(d_pq, q); + cl::sycl::free(d_tend_loc_t, q); + cl::sycl::free(d_tend_loc_q, q); + cl::sycl::free(d_tend_loc_a, q); + cl::sycl::free(d_tend_loc_cld, q); + cl::sycl::free(d_tend_tmp_t, q); + cl::sycl::free(d_tend_tmp_q, q); + cl::sycl::free(d_tend_tmp_a, q); + cl::sycl::free(d_tend_tmp_cld, q); + cl::sycl::free(d_pvfa, q); + cl::sycl::free(d_pvfl, q); + cl::sycl::free(d_pvfi, q); + cl::sycl::free(d_pdyna, q); + cl::sycl::free(d_pdynl, q); + cl::sycl::free(d_pdyni, q); + cl::sycl::free(d_phrsw, q); + cl::sycl::free(d_phrlw, q); + cl::sycl::free(d_pvervel, q); + cl::sycl::free(d_pap, q); + cl::sycl::free(d_paph, q); + cl::sycl::free(d_plsm, q); + cl::sycl::free(d_ktype, q); + cl::sycl::free(d_plu, q); + cl::sycl::free(d_plude, q); + cl::sycl::free(d_psnde, q); + cl::sycl::free(d_pmfu, q); + cl::sycl::free(d_pmfd, q); + cl::sycl::free(d_pa, q); + cl::sycl::free(d_pclv, q); + cl::sycl::free(d_psupsat, q); + cl::sycl::free(d_yrecldp, q); + cl::sycl::free(d_pcovptot, q); + cl::sycl::free(d_prainfrac_toprfz, q); + cl::sycl::free(d_pfsqlf, q); + cl::sycl::free(d_pfsqif, q); + cl::sycl::free(d_pfcqnng, q); + cl::sycl::free(d_pfcqlng, q); + cl::sycl::free(d_pfsqrf, q); + cl::sycl::free(d_pfsqsf, q); + cl::sycl::free(d_pfcqrng, q); + cl::sycl::free(d_pfcqsng, q); + cl::sycl::free(d_pfsqltur, q); + cl::sycl::free(d_pfsqitur, q); + cl::sycl::free(d_pfplsl, q); + cl::sycl::free(d_pfplsn, q); + cl::sycl::free(d_pfhpsl, q); + cl::sycl::free(d_pfhpsn, q); + // end free device +} + diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver.h b/src/cloudsc_sycl/cloudsc/cloudsc_driver.h new file mode 100644 index 00000000..9995df15 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver.h @@ -0,0 +1,20 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include +#include + +#include "yoecldp_c.h" +#include "load_state.h" +#include "cloudsc_validate.h" + +void cloudsc_driver(int numthreads, int numcols, int nproma); diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp new file mode 100644 index 00000000..eb00d4f0 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp @@ -0,0 +1,678 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "cloudsc_driver_hoist.h" +#include "cloudsc_c_hoist.kernel" + +#include +#include "mycpu.h" +#include + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +void cloudsc_driver(int numthreads, int numcols, int nproma) { + + cl::sycl::default_selector device_select; + cl::sycl::queue q( device_select ); + + printf("Running on %s\n", q.get_device().get_info().c_str()); + + double *tend_tmp_u; + double *tend_tmp_v; + double *tend_tmp_t; + double *tend_tmp_q; + double *tend_tmp_o3; + double *tend_tmp_a; + double *tend_tmp_cld; + + double *tend_loc_u; + double *tend_loc_v; + double *tend_loc_t; + double *tend_loc_q; + double *tend_loc_o3; + double *tend_loc_a; + double *tend_loc_cld; + + double *tend_cml_u; + double *tend_cml_v; + double *tend_cml_t; + double *tend_cml_q; + double *tend_cml_o3; + double *tend_cml_a; + double *tend_cml_cld; + + double ptsphy; //! Physics timestep + + double *plcrit_aer; + double *picrit_aer; + double *pre_ice; + double *pccn; //! liquid cloud condensation nuclei + double *pnice; //! ice number concentration (cf. CCN) + double *pt; //! T at start of callpar + double *pq; //! Q at start of callpar + double *pvfa; //! CC from VDF scheme + double *pvfl; //! Liq from VDF scheme + double *pvfi; //! Ice from VDF scheme + double *pdyna; //! CC from Dynamics + double *pdynl; //! Liq from Dynamics + double *pdyni; //! Liq from Dynamics + double *phrsw; //! Short-wave heating rate + double *phrlw; //! Long-wave heating rate + double *pvervel; //! Vertical velocity + double *pap; //! Pressure on full levels + double *paph; //! Pressure on half levels + double *plsm; //! Land fraction (0-1) + int *ldcum; + int *ktype; //! Convection type 0,1,2 + double *plu; //! Conv. condensate + double *plude; //! Conv. detrained water + double *plude_tmp; + double *psnde; //! Conv. detrained snow + double *pmfu; //! Conv. mass flux up + double *pmfd; //! Conv. mass flux down + double *pa; //! Original Cloud fraction (t) + + double *pclv; + double *psupsat; + + double *pcovptot; //! Precip fraction + double *prainfrac_toprfz; + double *pfsqlf; //! Flux of liquid + double *pfsqif; //! Flux of ice + double *pfcqlng; //! -ve corr for liq + double *pfcqnng; //! -ve corr for ice + double *pfsqrf; //! Flux diagnostics + double *pfsqsf; //! for DDH, generic + double *pfcqrng; //! rain + double *pfcqsng; //! snow + double *pfsqltur; //! liquid flux due to VDF + double *pfsqitur; //! ice flux due to VDF + double *pfplsl; //! liq+rain sedim flux + double *pfplsn; //! ice+snow sedim flux + double *pfhpsl; //! Enthalpy flux for liq + double *pfhpsn; //! Enthalpy flux for ice + + /* Define or query data dimensions from input file */ + int klon, nlev; + int nblocks = (numcols / nproma) + min(numcols % nproma, 1); + + double zinfo[4][numthreads]; + const double zhpm = 12482329.0; // IBM P7 HPM flop count for 100 points at L137 + + int nclv; // number of microphysics variables + int ncldql; // liquid cloud water + int ncldqi; // ice cloud water + int ncldqr; // rain water + int ncldqs; // snow + int ncldqv; // vapour + + nclv = 5; // number of microphysics variables + ncldql = 1; // liquid cloud water + ncldqi = 2; // ice cloud water + ncldqr = 3; // rain water + ncldqs = 4; // snow + ncldqv = 5; // vapour + + struct TECLDP *yrecldp = (struct TECLDP*)malloc(sizeof(struct TECLDP)); + + query_state(&klon, &nlev); + + tend_loc_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_cml_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_tmp_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + plcrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + picrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pre_ice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pccn = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pnice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pt = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pq = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfi = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyna = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdynl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyni = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrsw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrlw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvervel = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pap = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + paph = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + plsm = (double*) malloc( sizeof(double) * nblocks*nproma ); + ldcum = (int*) malloc( sizeof(int) * nblocks*nproma ); + ktype = (int*) malloc( sizeof(int) * nblocks*nproma ); + plu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + plude = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + psnde = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfd = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pclv = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + psupsat = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pcovptot = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + prainfrac_toprfz = (double*) malloc( sizeof(double) * nblocks*nproma ); + pfsqlf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqif = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqnng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqlng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqrf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqsf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqrng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqsng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqltur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqitur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + + + double rg; + double rd; + double rcpd; + double retv; + double rlvtt; + double rlstt; + double rlmlt; + double rtt; + double rv; + double r2es; + double r3les; + double r3ies; + double r4les; + double r4ies; + double r5les; + double r5ies; + double r5alvcp; + double r5alscp; + double ralvdcp; + double ralsdcp; + double ralfdcp; + double rtwat; + double rtice; + double rticecu; + double rtwat_rtice_r; + double rtwat_rticecu_r; + double rkoop1; + double rkoop2; + + // device declarations + double *d_plcrit_aer; + double *d_picrit_aer; + double *d_pre_ice; + double *d_pccn; + double *d_pnice; + double *d_pt; + double *d_pq; + double *d_tend_loc_t; + double *d_tend_loc_q; + double *d_tend_loc_a; + double *d_tend_loc_cld; + double *d_tend_tmp_t; + double *d_tend_tmp_q; + double *d_tend_tmp_a; + double *d_tend_tmp_cld; + double *d_pvfa; + double *d_pvfl; + double *d_pvfi; + double *d_pdyna; + double *d_pdynl; + double *d_pdyni; + double *d_phrsw; + double *d_phrlw; + double *d_pvervel; + double *d_pap; + double *d_paph; + double *d_plsm; + int *d_ktype; + double *d_plu; + double *d_plude; + double *d_psnde; + double *d_pmfu; + double *d_pmfd; + double *d_pa; + double *d_pclv; + double *d_psupsat; + struct TECLDP *d_yrecldp; + double *d_pcovptot; + double *d_prainfrac_toprfz; + double *d_pfsqlf; + double *d_pfsqif; + double *d_pfcqnng; + double *d_pfcqlng; + double *d_pfsqrf; + double *d_pfsqsf; + double *d_pfcqrng; + double *d_pfcqsng; + double *d_pfsqltur; + double *d_pfsqitur; + double *d_pfplsl; + double *d_pfplsn; + double *d_pfhpsl; + double *d_pfhpsn; + double *d_zfoealfa; + double *d_ztp1; + double *d_zli; + double *d_za; + double *d_zaorig; + double *d_zliqfrac; + double *d_zicefrac; + double *d_zqx; + double *d_zqx0; + double *d_zpfplsx; + double *d_zlneg; + double *d_zqxn2d; + double *d_zqsmix; + double *d_zqsliq; + double *d_zqsice; + double *d_zfoeewmt; + double *d_zfoeew; + double *d_zfoeeliqt; + // end device declarations + + // + d_plcrit_aer = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_picrit_aer = cl::sycl::malloc_device( nblocks*nlev*nproma, q); + d_pre_ice = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pccn = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pnice = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pt = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pq = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_t = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_q = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_a = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_cld = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_tend_tmp_t = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_q = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_a = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_cld = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_pvfa = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvfl = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvfi = cl::sycl::malloc_device(nblocks*nlev*nproma,q ); + d_pdyna = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pdynl = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pdyni = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_phrsw = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_phrlw = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvervel = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pap = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_paph = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_plsm = cl::sycl::malloc_device(nblocks*nproma, q); + d_ktype = cl::sycl::malloc_device(nblocks*nproma, q); + d_plu = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_plude = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_psnde = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pmfu = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pmfd = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pa = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pclv = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_psupsat = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_yrecldp = (TECLDP*) cl::sycl::malloc_device( sizeof(TECLDP), q); + d_pcovptot = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_prainfrac_toprfz = cl::sycl::malloc_device(nblocks*nproma, q); + d_pfsqlf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqif = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqnng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqlng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqrf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqsf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqrng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqsng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqltur = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqitur = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfplsl = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfplsn = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfhpsl = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfhpsn = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zfoealfa = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_ztp1 = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zli = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_za = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zaorig = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zliqfrac = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zicefrac = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zqx = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma*nclv, q); + d_zqx0 = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma*nclv, q); + d_zpfplsx = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma*nclv, q); + d_zlneg = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma*nclv, q); + d_zqxn2d = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma*nclv, q); + d_zqsmix = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zqsliq = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zqsice = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zfoeewmt = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zfoeew = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_zfoeeliqt = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + // + + load_state(klon, nlev, nclv, numcols, nproma, &ptsphy, plcrit_aer, picrit_aer, + pre_ice, pccn, pnice, pt, pq, + tend_cml_t, tend_cml_q, tend_cml_a, tend_cml_cld, + tend_tmp_t, tend_tmp_q, tend_tmp_a, tend_tmp_cld, + pvfa, pvfl, pvfi, pdyna, pdynl, pdyni, + phrsw, phrlw, pvervel, pap, paph, plsm, ktype, plu, + plude, psnde, pmfu, pmfd, pa, pclv, psupsat, yrecldp, + &rg, &rd, &rcpd, &retv, &rlvtt, &rlstt, + &rlmlt, &rtt, &rv, &r2es, &r3les, &r3ies, + &r4les, &r4ies, &r5les, &r5ies, &r5alvcp, &r5alscp, + &ralvdcp, &ralsdcp, &ralfdcp, &rtwat, + &rtice, &rticecu, &rtwat_rtice_r, &rtwat_rticecu_r, + &rkoop1, &rkoop2 ); + + // host to device + q.memcpy(d_plcrit_aer, plcrit_aer, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_picrit_aer, picrit_aer, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pre_ice, pre_ice, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pccn, pccn, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pnice, pnice, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pt, pt, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pq, pq, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_t, tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_q, tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_a, tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_cld, tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_tend_tmp_t, tend_tmp_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_q, tend_tmp_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_a, tend_tmp_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_cld, tend_tmp_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_pvfa, pvfa, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvfl, pvfl, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvfi, pvfi, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdyna, pdyna, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdynl, pdynl, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdyni, pdyni, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_phrsw, phrsw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_phrlw, phrlw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvervel, pvervel, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pap, pap, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_paph, paph, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(d_plsm, plsm, sizeof(double) * nblocks*nproma); + q.memcpy(d_ktype, ktype, sizeof(int) * nblocks*nproma); + q.memcpy(d_plu, plu, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_plude, plude, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_psnde, psnde, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pmfu, pmfu, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pmfd, pmfd, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pa, pa, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pclv, pclv, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_psupsat, psupsat, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_yrecldp, yrecldp, sizeof(TECLDP)); + q.wait(); + // end host to device + + double t1 = omp_get_wtime(); + + int b, bsize, icalls=0, igpc=numcols; + int coreid = mycpu(); + int tid = omp_get_thread_num(); + double start = omp_get_wtime(); + + int jkglo = 0; + int ibl = (jkglo - 1) / nproma + 1; + int icend = min(nproma, numcols - jkglo + 1); + + cl::sycl::range<1> global(numcols); + cl::sycl::range<1> local(nproma); + + q.submit([&](cl::sycl::handler &h) { + cl::sycl::stream out_stream(16384, 512, h); + h.parallel_for( cl::sycl::nd_range<1>( global, local), [=] (cl::sycl::nd_item<1> item_ct1) { + + cloudsc_c(1, icend, nproma, ptsphy, d_pt, d_pq, + d_tend_tmp_t, d_tend_tmp_q, d_tend_tmp_a, d_tend_tmp_cld, + d_tend_loc_t, d_tend_loc_q, d_tend_loc_a, d_tend_loc_cld, + d_pvfa, d_pvfl, d_pvfi, + d_pdyna, d_pdynl, d_pdyni, + d_phrsw, d_phrlw, d_pvervel, + d_pap, d_paph, d_plsm, d_ktype, + d_plu, d_plude, d_psnde, d_pmfu, d_pmfd, + d_pa, d_pclv, d_psupsat, + d_plcrit_aer, d_picrit_aer, d_pre_ice, d_pccn, d_pnice, + d_pcovptot, d_prainfrac_toprfz, d_pfsqlf, + d_pfsqif, d_pfcqnng, d_pfcqlng, + d_pfsqrf, d_pfsqsf, d_pfcqrng, + d_pfcqsng, d_pfsqltur, d_pfsqitur, + d_pfplsl, d_pfplsn, d_pfhpsl, d_pfhpsn, d_yrecldp, + nblocks, rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, + rv, r2es, r3les, r3ies, r4les, r4ies, r5les, + r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, + rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, + rkoop1, rkoop2, + d_zfoealfa, d_ztp1, d_zli, + d_za, d_zaorig, d_zliqfrac, + d_zicefrac, d_zqx, d_zqx0, + d_zpfplsx, d_zlneg, d_zqxn2d, + d_zqsmix, d_zqsliq, d_zqsice, + d_zfoeewmt, d_zfoeew, d_zfoeeliqt, + out_stream, item_ct1); + + + }); + }); + + q.wait(); + + double end = omp_get_wtime(); + + // device to host + q.memcpy(tend_loc_t, d_tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_q, d_tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_a, d_tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_cld, d_tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(phrlw, d_phrlw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(plude, d_plude, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(yrecldp, d_yrecldp, sizeof(TECLDP)); + q.memcpy(pcovptot, d_pcovptot, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(prainfrac_toprfz, d_prainfrac_toprfz, sizeof(double) * nblocks*nproma); + q.memcpy(pfsqlf, d_pfsqlf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqif, d_pfsqif, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqnng, d_pfcqnng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqlng, d_pfcqlng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqrf, d_pfsqrf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqsf, d_pfsqsf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqrng, d_pfcqrng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqsng, d_pfcqsng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqltur, d_pfsqltur, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqitur, d_pfsqitur, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfplsl, d_pfplsl, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfplsn, d_pfplsn, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfhpsl, d_pfhpsl, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfhpsn, d_pfhpsn, sizeof(double) * nblocks*(nlev+1)*nproma); + q.wait(); + + + /* int msec = diff * 1000 / CLOCKS_PER_SEC; */ + zinfo[0][tid] = end - start; + zinfo[1][tid] = (double) coreid; + zinfo[2][tid] = (double) icalls; + zinfo[3][tid] = (double) igpc; + + double t2 = omp_get_wtime(); + + printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; + for (int t = 0; t < numthreads; t++) { + const double tloc = zinfo[0][t]; + const int coreid = (int) zinfo[1][t]; + const int icalls = (int) zinfo[2][t]; + const int igpc = (int) zinfo[3][t]; + zfrac = (double)igpc / (double)numcols; + if (tloc > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; + } else { + zmflops = 0.; + zthrput = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); + } + double tdiff = t2 - t1; + zfrac = 1.0; + if (tdiff > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; + } else { + zmflops = 0.0; + zthrput = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); + + cloudsc_validate(klon, nlev, nclv, numcols, nproma, + plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, + pfcqlng, pfcqnng, pfsqrf, pfsqsf, pfcqrng, pfcqsng, + pfsqltur, pfsqitur, pfplsl, pfplsn, pfhpsl, pfhpsn, + tend_loc_a, tend_loc_q, tend_loc_t, tend_loc_cld); + + free(plcrit_aer); + free(picrit_aer); + free(pre_ice); + free(pccn); + free(pnice); + free(pt); + free(pq); + free(pvfa); + free(pvfl); + free(pvfi); + free(pdyna); + free(pdynl); + free(pdyni); + free(phrsw); + free(phrlw); + free(pvervel); + free(pap); + free(paph); + free(plsm); + free(ktype); + free(plu); + free(plude); + free(psnde); + free(pmfu); + free(pmfd); + free(pa); + free(pclv); + free(psupsat); + free(pcovptot); + free(tend_loc_t); + free(tend_loc_q); + free(tend_loc_a); + free(tend_loc_cld); + free(tend_tmp_t); + free(tend_tmp_q); + free(tend_tmp_a); + free(tend_tmp_cld); + free(tend_cml_t); + free(tend_cml_q); + free(tend_cml_a); + free(tend_cml_cld); + free(prainfrac_toprfz); + free(pfsqlf); + free(pfsqif); + free(pfcqnng); + free(pfcqlng); + free(pfsqrf); + free(pfsqsf); + free(pfcqrng); + free(pfcqsng); + free(pfsqltur); + free(pfsqitur); + free(pfplsl); + free(pfplsn); + free(pfhpsl); + free(pfhpsn); + free(yrecldp); + + // free device + cl::sycl::free(d_plcrit_aer, q); + cl::sycl::free(d_picrit_aer, q); + cl::sycl::free(d_pre_ice, q); + cl::sycl::free(d_pccn, q); + cl::sycl::free(d_pnice, q); + cl::sycl::free(d_pt, q); + cl::sycl::free(d_pq, q); + cl::sycl::free(d_tend_loc_t, q); + cl::sycl::free(d_tend_loc_q, q); + cl::sycl::free(d_tend_loc_a, q); + cl::sycl::free(d_tend_loc_cld, q); + cl::sycl::free(d_tend_tmp_t, q); + cl::sycl::free(d_tend_tmp_q, q); + cl::sycl::free(d_tend_tmp_a, q); + cl::sycl::free(d_tend_tmp_cld, q); + cl::sycl::free(d_pvfa, q); + cl::sycl::free(d_pvfl, q); + cl::sycl::free(d_pvfi, q); + cl::sycl::free(d_pdyna, q); + cl::sycl::free(d_pdynl, q); + cl::sycl::free(d_pdyni, q); + cl::sycl::free(d_phrsw, q); + cl::sycl::free(d_phrlw, q); + cl::sycl::free(d_pvervel, q); + cl::sycl::free(d_pap, q); + cl::sycl::free(d_paph, q); + cl::sycl::free(d_plsm, q); + cl::sycl::free(d_ktype, q); + cl::sycl::free(d_plu, q); + cl::sycl::free(d_plude, q); + cl::sycl::free(d_psnde, q); + cl::sycl::free(d_pmfu, q); + cl::sycl::free(d_pmfd, q); + cl::sycl::free(d_pa, q); + cl::sycl::free(d_pclv, q); + cl::sycl::free(d_psupsat, q); + cl::sycl::free(d_yrecldp, q); + cl::sycl::free(d_pcovptot, q); + cl::sycl::free(d_prainfrac_toprfz, q); + cl::sycl::free(d_pfsqlf, q); + cl::sycl::free(d_pfsqif, q); + cl::sycl::free(d_pfcqnng, q); + cl::sycl::free(d_pfcqlng, q); + cl::sycl::free(d_pfsqrf, q); + cl::sycl::free(d_pfsqsf, q); + cl::sycl::free(d_pfcqrng, q); + cl::sycl::free(d_pfcqsng, q); + cl::sycl::free(d_pfsqltur, q); + cl::sycl::free(d_pfsqitur, q); + cl::sycl::free(d_pfplsl, q); + cl::sycl::free(d_pfplsn, q); + cl::sycl::free(d_pfhpsl, q); + cl::sycl::free(d_pfhpsn, q); + cl::sycl::free(d_zfoealfa,q ); + cl::sycl::free(d_ztp1, q); + cl::sycl::free(d_zli, q); + cl::sycl::free(d_za, q); + cl::sycl::free(d_zaorig, q); + cl::sycl::free(d_zliqfrac, q); + cl::sycl::free(d_zicefrac, q); + cl::sycl::free(d_zqx, q); + cl::sycl::free(d_zqx0, q); + cl::sycl::free(d_zpfplsx, q); + cl::sycl::free(d_zlneg, q); + cl::sycl::free(d_zqxn2d, q); + cl::sycl::free(d_zqsmix, q); + cl::sycl::free(d_zqsliq, q); + cl::sycl::free(d_zqsice, q); + cl::sycl::free(d_zfoeewmt, q); + cl::sycl::free(d_zfoeew, q); + cl::sycl::free(d_zfoeeliqt, q); + // end free device +} + diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.h b/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.h new file mode 100644 index 00000000..9995df15 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.h @@ -0,0 +1,20 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include +#include + +#include "yoecldp_c.h" +#include "load_state.h" +#include "cloudsc_validate.h" + +void cloudsc_driver(int numthreads, int numcols, int nproma); diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp new file mode 100644 index 00000000..c191d721 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp @@ -0,0 +1,617 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "cloudsc_driver.h" +#include "cloudsc_c_k_caching.kernel" + +#include +#include "mycpu.h" +#include + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +void cloudsc_driver(int numthreads, int numcols, int nproma) { + + cl::sycl::default_selector device_select; + cl::sycl::queue q( device_select ); + + printf("Running on %s\n", q.get_device().get_info().c_str()); + + double *tend_tmp_u; + double *tend_tmp_v; + double *tend_tmp_t; + double *tend_tmp_q; + double *tend_tmp_o3; + double *tend_tmp_a; + double *tend_tmp_cld; + + double *tend_loc_u; + double *tend_loc_v; + double *tend_loc_t; + double *tend_loc_q; + double *tend_loc_o3; + double *tend_loc_a; + double *tend_loc_cld; + + double *tend_cml_u; + double *tend_cml_v; + double *tend_cml_t; + double *tend_cml_q; + double *tend_cml_o3; + double *tend_cml_a; + double *tend_cml_cld; + + double ptsphy; //! Physics timestep + + double *plcrit_aer; + double *picrit_aer; + double *pre_ice; + double *pccn; //! liquid cloud condensation nuclei + double *pnice; //! ice number concentration (cf. CCN) + double *pt; //! T at start of callpar + double *pq; //! Q at start of callpar + double *pvfa; //! CC from VDF scheme + double *pvfl; //! Liq from VDF scheme + double *pvfi; //! Ice from VDF scheme + double *pdyna; //! CC from Dynamics + double *pdynl; //! Liq from Dynamics + double *pdyni; //! Liq from Dynamics + double *phrsw; //! Short-wave heating rate + double *phrlw; //! Long-wave heating rate + double *pvervel; //! Vertical velocity + double *pap; //! Pressure on full levels + double *paph; //! Pressure on half levels + double *plsm; //! Land fraction (0-1) + int *ldcum; + int *ktype; //! Convection type 0,1,2 + double *plu; //! Conv. condensate + double *plude; //! Conv. detrained water + double *plude_tmp; + double *psnde; //! Conv. detrained snow + double *pmfu; //! Conv. mass flux up + double *pmfd; //! Conv. mass flux down + double *pa; //! Original Cloud fraction (t) + + double *pclv; + double *psupsat; + + double *pcovptot; //! Precip fraction + double *prainfrac_toprfz; + double *pfsqlf; //! Flux of liquid + double *pfsqif; //! Flux of ice + double *pfcqlng; //! -ve corr for liq + double *pfcqnng; //! -ve corr for ice + double *pfsqrf; //! Flux diagnostics + double *pfsqsf; //! for DDH, generic + double *pfcqrng; //! rain + double *pfcqsng; //! snow + double *pfsqltur; //! liquid flux due to VDF + double *pfsqitur; //! ice flux due to VDF + double *pfplsl; //! liq+rain sedim flux + double *pfplsn; //! ice+snow sedim flux + double *pfhpsl; //! Enthalpy flux for liq + double *pfhpsn; //! Enthalpy flux for ice + + /* Define or query data dimensions from input file */ + int klon, nlev; + int nblocks = (numcols / nproma) + min(numcols % nproma, 1); + + double zinfo[4][numthreads]; + const double zhpm = 12482329.0; // IBM P7 HPM flop count for 100 points at L137 + + int nclv; // number of microphysics variables + int ncldql; // liquid cloud water + int ncldqi; // ice cloud water + int ncldqr; // rain water + int ncldqs; // snow + int ncldqv; // vapour + + nclv = 5; // number of microphysics variables + ncldql = 1; // liquid cloud water + ncldqi = 2; // ice cloud water + ncldqr = 3; // rain water + ncldqs = 4; // snow + ncldqv = 5; // vapour + + struct TECLDP *yrecldp = (struct TECLDP*)malloc(sizeof(struct TECLDP)); + + query_state(&klon, &nlev); + + tend_loc_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_loc_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_cml_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_cml_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + tend_tmp_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + tend_tmp_cld = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + + plcrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + picrit_aer = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pre_ice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pccn = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pnice = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pt = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pq = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvfi = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyna = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdynl = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pdyni = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrsw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + phrlw = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pvervel = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pap = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + paph = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + plsm = (double*) malloc( sizeof(double) * nblocks*nproma ); + ldcum = (int*) malloc( sizeof(int) * nblocks*nproma ); + ktype = (int*) malloc( sizeof(int) * nblocks*nproma ); + plu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + plude = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + psnde = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfu = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pmfd = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pa = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pclv = (double*) malloc( sizeof(double) * nblocks*nlev*nproma*nclv ); + psupsat = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + pcovptot = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + prainfrac_toprfz = (double*) malloc( sizeof(double) * nblocks*nproma ); + pfsqlf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqif = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqnng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqlng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqrf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqsf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqrng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfcqsng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqltur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfsqitur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfplsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + pfhpsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + + + double rg; + double rd; + double rcpd; + double retv; + double rlvtt; + double rlstt; + double rlmlt; + double rtt; + double rv; + double r2es; + double r3les; + double r3ies; + double r4les; + double r4ies; + double r5les; + double r5ies; + double r5alvcp; + double r5alscp; + double ralvdcp; + double ralsdcp; + double ralfdcp; + double rtwat; + double rtice; + double rticecu; + double rtwat_rtice_r; + double rtwat_rticecu_r; + double rkoop1; + double rkoop2; + + // device declarations + double *d_plcrit_aer; + double *d_picrit_aer; + double *d_pre_ice; + double *d_pccn; + double *d_pnice; + double *d_pt; + double *d_pq; + double *d_tend_loc_t; + double *d_tend_loc_q; + double *d_tend_loc_a; + double *d_tend_loc_cld; + double *d_tend_tmp_t; + double *d_tend_tmp_q; + double *d_tend_tmp_a; + double *d_tend_tmp_cld; + double *d_pvfa; + double *d_pvfl; + double *d_pvfi; + double *d_pdyna; + double *d_pdynl; + double *d_pdyni; + double *d_phrsw; + double *d_phrlw; + double *d_pvervel; + double *d_pap; + double *d_paph; + double *d_plsm; + int *d_ktype; + double *d_plu; + double *d_plude; + double *d_psnde; + double *d_pmfu; + double *d_pmfd; + double *d_pa; + double *d_pclv; + double *d_psupsat; + struct TECLDP *d_yrecldp; + double *d_pcovptot; + double *d_prainfrac_toprfz; + double *d_pfsqlf; + double *d_pfsqif; + double *d_pfcqnng; + double *d_pfcqlng; + double *d_pfsqrf; + double *d_pfsqsf; + double *d_pfcqrng; + double *d_pfcqsng; + double *d_pfsqltur; + double *d_pfsqitur; + double *d_pfplsl; + double *d_pfplsn; + double *d_pfhpsl; + double *d_pfhpsn; + // end device declarations + + + d_plcrit_aer = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_picrit_aer = cl::sycl::malloc_device( nblocks*nlev*nproma, q); + d_pre_ice = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pccn = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pnice = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pt = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pq = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_t = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_q = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_a = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_loc_cld = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_tend_tmp_t = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_q = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_a = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_tend_tmp_cld = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_pvfa = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvfl = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvfi = cl::sycl::malloc_device(nblocks*nlev*nproma,q ); + d_pdyna = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pdynl = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pdyni = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_phrsw = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_phrlw = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pvervel = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pap = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_paph = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_plsm = cl::sycl::malloc_device(nblocks*nproma, q); + d_ktype = cl::sycl::malloc_device(nblocks*nproma, q); + d_plu = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_plude = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_psnde = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pmfu = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pmfd = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pa = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_pclv = cl::sycl::malloc_device(nblocks*nlev*nproma*nclv, q); + d_psupsat = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_yrecldp = (TECLDP*) cl::sycl::malloc_device( sizeof(TECLDP), q); + d_pcovptot = cl::sycl::malloc_device(nblocks*nlev*nproma, q); + d_prainfrac_toprfz = cl::sycl::malloc_device(nblocks*nproma, q); + d_pfsqlf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqif = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqnng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqlng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqrf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqsf = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqrng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfcqsng = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqltur = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfsqitur = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfplsl = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfplsn = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfhpsl = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + d_pfhpsn = cl::sycl::malloc_device(nblocks*(nlev+1)*nproma, q); + + load_state(klon, nlev, nclv, numcols, nproma, &ptsphy, plcrit_aer, picrit_aer, + pre_ice, pccn, pnice, pt, pq, + tend_cml_t, tend_cml_q, tend_cml_a, tend_cml_cld, + tend_tmp_t, tend_tmp_q, tend_tmp_a, tend_tmp_cld, + pvfa, pvfl, pvfi, pdyna, pdynl, pdyni, + phrsw, phrlw, pvervel, pap, paph, plsm, ktype, plu, + plude, psnde, pmfu, pmfd, pa, pclv, psupsat, yrecldp, + &rg, &rd, &rcpd, &retv, &rlvtt, &rlstt, + &rlmlt, &rtt, &rv, &r2es, &r3les, &r3ies, + &r4les, &r4ies, &r5les, &r5ies, &r5alvcp, &r5alscp, + &ralvdcp, &ralsdcp, &ralfdcp, &rtwat, + &rtice, &rticecu, &rtwat_rtice_r, &rtwat_rticecu_r, + &rkoop1, &rkoop2 ); + + // host to device + q.memcpy(d_plcrit_aer, plcrit_aer, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_picrit_aer, picrit_aer, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pre_ice, pre_ice, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pccn, pccn, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pnice, pnice, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pt, pt, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pq, pq, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_t, tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_q, tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_a, tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_loc_cld, tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_tend_tmp_t, tend_tmp_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_q, tend_tmp_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_a, tend_tmp_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_tend_tmp_cld, tend_tmp_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_pvfa, pvfa, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvfl, pvfl, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvfi, pvfi, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdyna, pdyna, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdynl, pdynl, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pdyni, pdyni, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_phrsw, phrsw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_phrlw, phrlw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pvervel, pvervel, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pap, pap, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_paph, paph, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(d_plsm, plsm, sizeof(double) * nblocks*nproma); + q.memcpy(d_ktype, ktype, sizeof(int) * nblocks*nproma); + q.memcpy(d_plu, plu, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_plude, plude, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_psnde, psnde, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pmfu, pmfu, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pmfd, pmfd, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pa, pa, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_pclv, pclv, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(d_psupsat, psupsat, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(d_yrecldp, yrecldp, sizeof(TECLDP)); + q.wait(); + // end host to device + + double t1 = omp_get_wtime(); + + int b, bsize, icalls=0, igpc=numcols; + int coreid = mycpu(); + int tid = omp_get_thread_num(); + double start = omp_get_wtime(); + + int jkglo = 0; + int ibl = (jkglo - 1) / nproma + 1; + int icend = min(nproma, numcols - jkglo + 1); + + + cl::sycl::range<1> global(numcols); + cl::sycl::range<1> local(nproma); + + q.submit([&](cl::sycl::handler &h) { + cl::sycl::stream out_stream(16384, 512, h); + h.parallel_for( cl::sycl::nd_range<1>( global, local), [=] (cl::sycl::nd_item<1> item_ct1) { + + cloudsc_c(1, icend, nproma, ptsphy, d_pt, d_pq, + d_tend_tmp_t, d_tend_tmp_q, d_tend_tmp_a, d_tend_tmp_cld, + d_tend_loc_t, d_tend_loc_q, d_tend_loc_a, d_tend_loc_cld, + d_pvfa, d_pvfl, d_pvfi, + d_pdyna, d_pdynl, d_pdyni, + d_phrsw, d_phrlw, d_pvervel, + d_pap, d_paph, d_plsm, d_ktype, + d_plu, d_plude, d_psnde, d_pmfu, d_pmfd, + d_pa, d_pclv, d_psupsat, + d_plcrit_aer, d_picrit_aer, d_pre_ice, d_pccn, d_pnice, + d_pcovptot, d_prainfrac_toprfz, d_pfsqlf, + d_pfsqif, d_pfcqnng, d_pfcqlng, + d_pfsqrf, d_pfsqsf, d_pfcqrng, + d_pfcqsng, d_pfsqltur, d_pfsqitur, + d_pfplsl, d_pfplsn, d_pfhpsl, d_pfhpsn, d_yrecldp, + nblocks, rg, rd, rcpd, retv, rlvtt, rlstt, rlmlt, rtt, + rv, r2es, r3les, r3ies, r4les, r4ies, r5les, + r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, + rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, + rkoop1, rkoop2, out_stream, item_ct1); + + }); + }); + + q.wait(); + + double end = omp_get_wtime(); + + // device to host + q.memcpy(tend_loc_t, d_tend_loc_t, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_q, d_tend_loc_q, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_a, d_tend_loc_a, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(tend_loc_cld, d_tend_loc_cld, sizeof(double) * nblocks*nlev*nproma*nclv); + q.memcpy(phrlw, d_phrlw, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(plude, d_plude, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(yrecldp, d_yrecldp, sizeof(TECLDP)); + q.memcpy(pcovptot, d_pcovptot, sizeof(double) * nblocks*nlev*nproma); + q.memcpy(prainfrac_toprfz, d_prainfrac_toprfz, sizeof(double) * nblocks*nproma); + q.memcpy(pfsqlf, d_pfsqlf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqif, d_pfsqif, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqnng, d_pfcqnng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqlng, d_pfcqlng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqrf, d_pfsqrf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqsf, d_pfsqsf, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqrng, d_pfcqrng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfcqsng, d_pfcqsng, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqltur, d_pfsqltur, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfsqitur, d_pfsqitur, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfplsl, d_pfplsl, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfplsn, d_pfplsn, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfhpsl, d_pfhpsl, sizeof(double) * nblocks*(nlev+1)*nproma); + q.memcpy(pfhpsn, d_pfhpsn, sizeof(double) * nblocks*(nlev+1)*nproma); + q.wait(); + // end device to host + + /* int msec = diff * 1000 / CLOCKS_PER_SEC; */ + zinfo[0][tid] = end - start; + zinfo[1][tid] = (double) coreid; + zinfo[2][tid] = (double) icalls; + zinfo[3][tid] = (double) igpc; + + double t2 = omp_get_wtime(); + + printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; + for (int t = 0; t < numthreads; t++) { + const double tloc = zinfo[0][t]; + const int coreid = (int) zinfo[1][t]; + const int icalls = (int) zinfo[2][t]; + const int igpc = (int) zinfo[3][t]; + zfrac = (double)igpc / (double)numcols; + if (tloc > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; + } else { + zmflops = 0.; + zthrput = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); + } + double tdiff = t2 - t1; + zfrac = 1.0; + if (tdiff > 0.0) { + zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; + } else { + zmflops = 0.0; + zthrput = 0.0; + } + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); + + cloudsc_validate(klon, nlev, nclv, numcols, nproma, + plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, + pfcqlng, pfcqnng, pfsqrf, pfsqsf, pfcqrng, pfcqsng, + pfsqltur, pfsqitur, pfplsl, pfplsn, pfhpsl, pfhpsn, + tend_loc_a, tend_loc_q, tend_loc_t, tend_loc_cld); + + free(plcrit_aer); + free(picrit_aer); + free(pre_ice); + free(pccn); + free(pnice); + free(pt); + free(pq); + free(pvfa); + free(pvfl); + free(pvfi); + free(pdyna); + free(pdynl); + free(pdyni); + free(phrsw); + free(phrlw); + free(pvervel); + free(pap); + free(paph); + free(plsm); + free(ktype); + free(plu); + free(plude); + free(psnde); + free(pmfu); + free(pmfd); + free(pa); + free(pclv); + free(psupsat); + free(pcovptot); + free(tend_loc_t); + free(tend_loc_q); + free(tend_loc_a); + free(tend_loc_cld); + free(tend_tmp_t); + free(tend_tmp_q); + free(tend_tmp_a); + free(tend_tmp_cld); + free(tend_cml_t); + free(tend_cml_q); + free(tend_cml_a); + free(tend_cml_cld); + free(prainfrac_toprfz); + free(pfsqlf); + free(pfsqif); + free(pfcqnng); + free(pfcqlng); + free(pfsqrf); + free(pfsqsf); + free(pfcqrng); + free(pfcqsng); + free(pfsqltur); + free(pfsqitur); + free(pfplsl); + free(pfplsn); + free(pfhpsl); + free(pfhpsn); + free(yrecldp); + + // free device + cl::sycl::free(d_plcrit_aer, q); + cl::sycl::free(d_picrit_aer, q); + cl::sycl::free(d_pre_ice, q); + cl::sycl::free(d_pccn, q); + cl::sycl::free(d_pnice, q); + cl::sycl::free(d_pt, q); + cl::sycl::free(d_pq, q); + cl::sycl::free(d_tend_loc_t, q); + cl::sycl::free(d_tend_loc_q, q); + cl::sycl::free(d_tend_loc_a, q); + cl::sycl::free(d_tend_loc_cld, q); + cl::sycl::free(d_tend_tmp_t, q); + cl::sycl::free(d_tend_tmp_q, q); + cl::sycl::free(d_tend_tmp_a, q); + cl::sycl::free(d_tend_tmp_cld, q); + cl::sycl::free(d_pvfa, q); + cl::sycl::free(d_pvfl, q); + cl::sycl::free(d_pvfi, q); + cl::sycl::free(d_pdyna, q); + cl::sycl::free(d_pdynl, q); + cl::sycl::free(d_pdyni, q); + cl::sycl::free(d_phrsw, q); + cl::sycl::free(d_phrlw, q); + cl::sycl::free(d_pvervel, q); + cl::sycl::free(d_pap, q); + cl::sycl::free(d_paph, q); + cl::sycl::free(d_plsm, q); + cl::sycl::free(d_ktype, q); + cl::sycl::free(d_plu, q); + cl::sycl::free(d_plude, q); + cl::sycl::free(d_psnde, q); + cl::sycl::free(d_pmfu, q); + cl::sycl::free(d_pmfd, q); + cl::sycl::free(d_pa, q); + cl::sycl::free(d_pclv, q); + cl::sycl::free(d_psupsat, q); + cl::sycl::free(d_yrecldp, q); + cl::sycl::free(d_pcovptot, q); + cl::sycl::free(d_prainfrac_toprfz, q); + cl::sycl::free(d_pfsqlf, q); + cl::sycl::free(d_pfsqif, q); + cl::sycl::free(d_pfcqnng, q); + cl::sycl::free(d_pfcqlng, q); + cl::sycl::free(d_pfsqrf, q); + cl::sycl::free(d_pfsqsf, q); + cl::sycl::free(d_pfcqrng, q); + cl::sycl::free(d_pfcqsng, q); + cl::sycl::free(d_pfsqltur, q); + cl::sycl::free(d_pfsqitur, q); + cl::sycl::free(d_pfplsl, q); + cl::sycl::free(d_pfplsn, q); + cl::sycl::free(d_pfhpsl, q); + cl::sycl::free(d_pfhpsn, q); + // end free device +} + diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp new file mode 100644 index 00000000..710614f9 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp @@ -0,0 +1,245 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "cloudsc_validate.h" + +#include +#include +#include +//#include + + +#define min(a,b) (((a)<(b))?(a):(b)) + +void print_error(const char *name, double zminval, double zmaxval, double zmaxerr, + double zerrsum, double zsum, double zavgpgp, int ndim) +{ + double zrelerr, zeps = std::numeric_limits::epsilon(); + int iopt = 0; + if (zerrsum < zeps) { + zrelerr = 0.0; + iopt = 1; + } else if (zsum < zeps) { + zrelerr = zerrsum / (1.0 + zsum); + iopt = 2; + } else { + zrelerr = zerrsum / zsum; + iopt = 3; + } + + //-- If you get 4 exclamation marks next to your error output, + // then it is likely that some uninitialized variables exists or + // some other screw-up -- watch out this !!!! + //char *clwarn; + const char* clwarn = (zrelerr > 10.0 * zeps) ? " !!!!" : " "; + zrelerr = 100.0 * zrelerr; + + printf(" %+20s %dD%d %20.13le %20.13le %20.13le %20.13le %20.13le %s\n", + name, ndim, iopt, zminval, zmaxval, zmaxerr, zavgpgp, zrelerr, clwarn); +} + + +void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, int ngptot, int nblocks) +{ + /* Computes and prints errors in the "L2 norm sense" */ + int b, bsize, jk; + double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; + //double (*field)[nlon] = (double (*)[nlon]) v_field; + //double (*reference)[nlon] = (double (*)[nlon]) v_ref; + + zminval = +std::numeric_limits::max(); + zmaxval = -std::numeric_limits::max(); + zmaxerr = 0.0; + zerrsum = 0.0; + zsum = 0.0; + +// #pragma omp parallel for default(shared) private(b, bsize, jk) \ +// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + for (b = 0; b < nblocks; b++) { + bsize = min(nlon, ngptot - b*nlon); // field block size + for (jk = 0; jk < bsize; jk++) { + zminval = fmin(zminval, v_field[b*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nlon+jk]); + + // Difference against reference result in one-norm sense + zdiff = fabs(v_field[b*nlon+jk] - v_ref[b*nlon+jk]); + zmaxerr = fmax(zmaxerr, zdiff); + zerrsum = zerrsum + zdiff; + zsum = zsum + abs(v_ref[b*nlon+jk]); + } + } + zavgpgp = zerrsum / (double) ngptot; + print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); +} + + +void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int nlev, int ngptot, int nblocks) +{ + /* Computes and prints errors in the "L2 norm sense" */ + int b, bsize, jl, jk; + double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; +// double (*field)[nlev][nlon] = (double (*)[nlev][nlon]) v_field; +// double (*reference)[nlev][nlon] = (double (*)[nlev][nlon]) v_ref; + + zminval = +std::numeric_limits::max(); + zmaxval = -std::numeric_limits::max(); + zmaxerr = 0.0; + zerrsum = 0.0; + zsum = 0.0; + +// #pragma omp parallel for default(shared) private(b, bsize, jl, jk) \ +// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + + for (b = 0; b < nblocks; b++) { + bsize = min(nlon, ngptot - b*nlon); // field block size + for (jl = 0; jl < nlev; jl++) { + for (jk = 0; jk < bsize; jk++) { + zminval = fmin(zminval, v_field[b*nlev*nlon+jl*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nlev*nlon+jl*nlon+jk]); + + // Difference against reference result in one-norm sense + zdiff = fabs(v_field[b*nlev*nlon+jl*nlon+jk] - v_ref[b*nlev*nlon+jl*nlon+jk]); + zmaxerr = fmax(zmaxerr, zdiff); + zerrsum = zerrsum + zdiff; + zsum = zsum + abs(v_ref[b*nlev*nlon+jl*nlon+jk]); + } + } + } + + zavgpgp = zerrsum / (double) ngptot; + print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); +} + + +void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, + int nlev, int nclv, int ngptot, int nblocks) +{ + /* Computes and prints errors in the "L2 norm sense" */ + int b, bsize, jl, jk, jm; + double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; +// double (*field)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_field; +// double (*reference)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_ref; + + zminval = +std::numeric_limits::max(); + zmaxval = -std::numeric_limits::max(); + zmaxerr = 0.0; + zerrsum = 0.0; + zsum = 0.0; + +// #pragma omp parallel for default(shared) private(b, bsize, jl, jk, jm) \ +// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + for (b = 0; b < nblocks; b++) { + bsize = min(nlon, ngptot - b*nlon); // field block size + for (jm = 0; jm < nclv; jm++) { + for (jl = 0; jl < nlev; jl++) { + for (jk = 0; jk < bsize; jk++) { + zminval = fmin(zminval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + + // Difference against reference result in one-norm sense + zdiff = fabs(v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk] - v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + zmaxerr = fmax(zmaxerr, zdiff); + zerrsum = zerrsum + zdiff; + zsum = zsum + abs(v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + } + } + } + } + zavgpgp = zerrsum / (double) ngptot; + print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); +} + + +int cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) +{ + const int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + double *ref_plude = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_pcovptot = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_prainfrac_toprfz = (double*) malloc( sizeof(double) * nblocks*nproma ); + double *ref_pfsqlf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqif = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqlng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqnng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqrf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqsf = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqrng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfcqsng = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqltur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfsqitur = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfplsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfplsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfhpsl = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_pfhpsn = (double*) malloc( sizeof(double) * nblocks*(nlev+1)*nproma ); + double *ref_tend_loc_a = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_tend_loc_q = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_tend_loc_t = (double*) malloc( sizeof(double) * nblocks*nlev*nproma ); + double *ref_tend_loc_cld = (double*) malloc( sizeof(double) * nblocks*nclv*nlev*nproma ); + + load_reference(nlon, nlev, nclv, ngptot, nproma, + ref_plude, ref_pcovptot, ref_prainfrac_toprfz, ref_pfsqlf, ref_pfsqif, + ref_pfcqlng, ref_pfcqnng, ref_pfsqrf, ref_pfsqsf, ref_pfcqrng, ref_pfcqsng, + ref_pfsqltur, ref_pfsqitur, ref_pfplsl, ref_pfplsn, ref_pfhpsl, ref_pfhpsn, + ref_tend_loc_a, ref_tend_loc_q, ref_tend_loc_t, ref_tend_loc_cld); + + + printf(" %+20s %s %+20s %+20s %+20s %+20s %+20s\n", + "Variable", "Dim", "MinValue", "MaxValue", "AbsMaxErr", "AvgAbsErr/GP", "MaxRelErr-%"); + + validate_2d("PLUDE", ref_plude, plude, nproma, nlev, ngptot, nblocks); + validate_2d("PCOVPTOT", ref_pcovptot, pcovptot, nproma, nlev, ngptot, nblocks); + validate_1d("PRAINFRAC_TOPRFZ", ref_prainfrac_toprfz, prainfrac_toprfz, nproma, ngptot, nblocks); + validate_2d("PFSQLF", ref_pfsqlf, pfsqlf, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQIF", ref_pfsqif, pfsqif, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQLNG", ref_pfcqlng, pfcqlng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQNNG", ref_pfcqnng, pfcqnng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQRF", ref_pfsqrf, pfsqrf, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQSF", ref_pfsqsf, pfsqsf, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQRNG", ref_pfcqrng, pfcqrng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFCQSNG", ref_pfcqsng, pfcqsng, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQLTUR", ref_pfsqltur, pfsqltur, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFSQITUR", ref_pfsqitur, pfsqitur, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFPLSL", ref_pfplsl, pfplsl, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFPLSN", ref_pfplsn, pfplsn, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFHPSL", ref_pfhpsl, pfhpsl, nproma, nlev+1, ngptot, nblocks); + validate_2d("PFHPSN", ref_pfhpsn, pfhpsn, nproma, nlev+1, ngptot, nblocks); + validate_2d("TENDENCY_LOC%A", ref_tend_loc_a, tend_loc_a, nproma, nlev, ngptot, nblocks); + validate_2d("TENDENCY_LOC%Q", ref_tend_loc_q, tend_loc_q, nproma, nlev, ngptot, nblocks); + validate_2d("TENDENCY_LOC%T", ref_tend_loc_t, tend_loc_t, nproma, nlev, ngptot, nblocks); + validate_3d("TENDENCY_LOC%CLD", ref_tend_loc_cld, tend_loc_cld, nproma, nlev, nclv, ngptot, nblocks); + + free(ref_plude); + free(ref_pcovptot); + free(ref_prainfrac_toprfz); + free(ref_pfsqlf); + free(ref_pfsqif); + free(ref_pfcqlng); + free(ref_pfcqnng); + free(ref_pfsqrf); + free(ref_pfsqsf); + free(ref_pfcqrng); + free(ref_pfcqsng); + free(ref_pfsqltur); + free(ref_pfsqitur); + free(ref_pfplsl); + free(ref_pfplsn); + free(ref_pfhpsl); + free(ref_pfhpsn); + free(ref_tend_loc_a); + free(ref_tend_loc_q); + free(ref_tend_loc_t); + free(ref_tend_loc_cld); + + return 0; + +} diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_validate.h b/src/cloudsc_sycl/cloudsc/cloudsc_validate.h new file mode 100644 index 00000000..7202e09c --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/cloudsc_validate.h @@ -0,0 +1,18 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "load_state.h" +//#include + +int cloudsc_validate(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld); diff --git a/src/cloudsc_sycl/cloudsc/load_state.cpp b/src/cloudsc_sycl/cloudsc/load_state.cpp new file mode 100644 index 00000000..610cf8f4 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/load_state.cpp @@ -0,0 +1,457 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "load_state.h" + +#include +#include "serialbox-c/Serialbox.h" + +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#define max(a, b) (((a) > (b)) ? (a) : (b)) + + +/* Query sizes and dimensions of state arrays */ +void query_state(int *klon, int *klev) +{ + serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); + serialboxMetainfo_t* globalMetainfo = serialboxSerializerGetGlobalMetainfo(serializer); + + *klon = serialboxMetainfoGetInt32(globalMetainfo, "KLON"); + *klev = serialboxMetainfoGetInt32(globalMetainfo, "KLEV"); + + serialboxMetainfoDestroy(globalMetainfo); + serialboxSerializerDestroy(serializer); +} + +void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) +{ + int gidx, bsize, bidx, bend, fsize, b, l, i; + + #pragma omp parallel for default(shared) private(gidx, bsize, bidx, bend, fsize, b, l, i) + for (b = 0; b < nblocks; b++) { + gidx = b*nproma; // Global starting index of the block in the general domain + bsize = min(nproma, ngptot - gidx); // Size of the field block + bidx = gidx % nlon; // Rolling index into the input buffer + bend = min(bidx+bsize, nlon); // Idealised final index in the input buffer + + if (bend-bidx < bsize) { + // The input buffer does not hold enough data to fill field block; + // we need to fill the rest of the block with data from front of buffer. + fsize = nlon - bidx; + for (i = 0; i < fsize; i++) { field_in[b*nproma+i] = buffer[bidx + i]; } + for (i = 0; i < bsize-fsize; i++) { field_in[b*nproma+fsize+i] = buffer[i]; } + } else { + // Simply copy a block of data from the rolling buffer index + for (i = 0; i < bsize; i++) { field_in[b*nproma+i] = buffer[bidx+i]; } + } + // Zero out the remainder of last block + for (i=bsize; iramid = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RAMID"); + yrecldp->rcldiff = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDIFF"); + yrecldp->rcldiff_convi = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDIFF_CONVI"); + yrecldp->rclcrit = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLCRIT"); + yrecldp->rclcrit_sea = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLCRIT_SEA"); + yrecldp->rclcrit_land = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLCRIT_LAND"); + yrecldp->rkconv = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RKCONV"); + yrecldp->rprc1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPRC1"); + yrecldp->rprc2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPRC2"); + yrecldp->rcldmax = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDMAX"); + yrecldp->rpecons = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPECONS"); + yrecldp->rvrfactor = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVRFACTOR"); + yrecldp->rprecrhmax = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RPRECRHMAX"); + yrecldp->rtaumel = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RTAUMEL"); + yrecldp->ramin = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RAMIN"); + yrecldp->rlmin = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RLMIN"); + yrecldp->rkooptau = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RKOOPTAU"); + + yrecldp->rcldtopp = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDTOPP"); + yrecldp->rlcritsnow = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RLCRITSNOW"); + yrecldp->rsnowlin1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RSNOWLIN1"); + yrecldp->rsnowlin2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RSNOWLIN2"); + yrecldp->ricehi1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RICEHI1"); + yrecldp->ricehi2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RICEHI2"); + yrecldp->riceinit = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RICEINIT"); + yrecldp->rvice = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVICE"); + yrecldp->rvrain = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVRAIN"); + yrecldp->rvsnow = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RVSNOW"); + yrecldp->rthomo = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RTHOMO"); + yrecldp->rcovpmin = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCOVPMIN"); + yrecldp->rccn = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCN"); + yrecldp->rnice = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RNICE"); + yrecldp->rccnom = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCNOM"); + yrecldp->rccnss = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCNSS"); + yrecldp->rccnsu = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCCNSU"); + yrecldp->rcldtopcf = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCLDTOPCF"); + yrecldp->rdepliqrefrate = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDEPLIQREFRATE"); + yrecldp->rdepliqrefdepth = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDEPLIQREFDEPTH"); + yrecldp->rcl_kkaac = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKAac"); + yrecldp->rcl_kkbac = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKBac"); + yrecldp->rcl_kkaau = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKAau"); + yrecldp->rcl_kkbauq = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKBauq"); + yrecldp->rcl_kkbaun = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KKBaun"); + yrecldp->rcl_kk_cloud_num_sea = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KK_cloud_num_sea"); + yrecldp->rcl_kk_cloud_num_land = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KK_cloud_num_land"); + yrecldp->rcl_ai = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_AI"); + yrecldp->rcl_bi = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_BI"); + yrecldp->rcl_ci = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CI"); + yrecldp->rcl_di = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DI"); + yrecldp->rcl_x1i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X1I"); + yrecldp->rcl_x2i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X2I"); + yrecldp->rcl_x3i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X3I"); + yrecldp->rcl_x4i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X4I"); + yrecldp->rcl_const1i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST1I"); + yrecldp->rcl_const2i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST2I"); + yrecldp->rcl_const3i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST3I"); + yrecldp->rcl_const4i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST4I"); + yrecldp->rcl_const5i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST5I"); + yrecldp->rcl_const6i = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST6I"); + yrecldp->rcl_apb1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_APB1"); + yrecldp->rcl_apb2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_APB2"); + yrecldp->rcl_apb3 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_APB3"); + yrecldp->rcl_as = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_AS"); + yrecldp->rcl_bs = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_BS"); + yrecldp->rcl_cs = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CS"); + yrecldp->rcl_ds = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DS"); + yrecldp->rcl_x1s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X1S"); + yrecldp->rcl_x2s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X2S"); + yrecldp->rcl_x3s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X3S"); + yrecldp->rcl_x4s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X4S"); + yrecldp->rcl_const1s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST1S"); + yrecldp->rcl_const2s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST2S"); + yrecldp->rcl_const3s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST3S"); + yrecldp->rcl_const4s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST4S"); + yrecldp->rcl_const5s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST5S"); + yrecldp->rcl_const6s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST6S"); + yrecldp->rcl_const7s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST7S"); + yrecldp->rcl_const8s = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST8S"); + yrecldp->rdenswat = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDENSWAT"); + yrecldp->rdensref = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RDENSREF"); + yrecldp->rcl_ar = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_AR"); + yrecldp->rcl_br = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_BR"); + yrecldp->rcl_cr = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CR"); + yrecldp->rcl_dr = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DR"); + yrecldp->rcl_x1r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X1R"); + yrecldp->rcl_x2r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X2R"); + yrecldp->rcl_x4r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_X4R"); + yrecldp->rcl_ka273 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_KA273"); + yrecldp->rcl_cdenom1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CDENOM1"); + yrecldp->rcl_cdenom2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CDENOM2"); + yrecldp->rcl_cdenom3 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CDENOM3"); + yrecldp->rcl_schmidt = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_SCHMIDT"); + yrecldp->rcl_dynvisc = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_DYNVISC"); + yrecldp->rcl_const1r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST1R"); + yrecldp->rcl_const2r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST2R"); + yrecldp->rcl_const3r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST3R"); + yrecldp->rcl_const4r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST4R"); + yrecldp->rcl_fac1 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FAC1"); + yrecldp->rcl_fac2 = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FAC2"); + yrecldp->rcl_const5r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST5R"); + yrecldp->rcl_const6r = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_CONST6R"); + yrecldp->rcl_fzrab = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FZRAB"); + yrecldp->rcl_fzrbb = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_RCL_FZRBB"); + yrecldp->lcldextra = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LCLDEXTRA"); + yrecldp->lcldbudget = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LCLDBUDGET"); + yrecldp->nssopt = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NSSOPT"); + yrecldp->ncldtop = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NCLDTOP"); + yrecldp->naeclbc = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLBC"); + yrecldp->naecldu = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLDU"); + yrecldp->naeclom = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLOM"); + yrecldp->naeclss = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLSS"); + yrecldp->naeclsu = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAECLSU"); + yrecldp->nclddiag = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NCLDDIAG"); + yrecldp->naercld = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NAERCLD"); + yrecldp->laerliqautolsp = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQAUTOLSP"); + yrecldp->laerliqautocp = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQAUTOCP"); + yrecldp->laerliqautocpb = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQAUTOCPB"); + yrecldp->laerliqcoll = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERLIQCOLL"); + yrecldp->laericesed = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERICESED"); + yrecldp->laericeauto = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_LAERICEAUTO"); + yrecldp->nshapep = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NSHAPEP"); + yrecldp->nshapeq = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NSHAPEQ"); + yrecldp->nbeta = serialboxMetainfoGetFloat64(metainfo, "YRECLDP_NBETA"); + + serialboxSerializerDestroySavepointVector(savepoints, 1); + serialboxSerializerDestroy(serializer); +} + + + +/* Read reference result into memory */ +void load_reference(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) +{ + serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "reference", "Binary"); + serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); + serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); + serialboxSavepoint_t* savepoint = savepoints[0]; + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + + load_and_expand_2d(serializer, savepoint, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(serializer, savepoint, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); + load_and_expand_1d(serializer, savepoint, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); + load_and_expand_2d(serializer, savepoint, "PFSQLF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqlf); + load_and_expand_2d(serializer, savepoint, "PFSQIF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqif); + load_and_expand_2d(serializer, savepoint, "PFCQLNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqlng); + load_and_expand_2d(serializer, savepoint, "PFCQNNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqnng); + load_and_expand_2d(serializer, savepoint, "PFSQRF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqrf); + load_and_expand_2d(serializer, savepoint, "PFSQSF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqsf); + load_and_expand_2d(serializer, savepoint, "PFCQRNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqrng); + load_and_expand_2d(serializer, savepoint, "PFCQSNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqsng); + load_and_expand_2d(serializer, savepoint, "PFSQLTUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqltur); + load_and_expand_2d(serializer, savepoint, "PFSQITUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqitur); + load_and_expand_2d(serializer, savepoint, "PFPLSL", nlon, nlev+1, nproma, ngptot, nblocks, pfplsl); + load_and_expand_2d(serializer, savepoint, "PFPLSN", nlon, nlev+1, nproma, ngptot, nblocks, pfplsn); + load_and_expand_2d(serializer, savepoint, "PFHPSL", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsl); + load_and_expand_2d(serializer, savepoint, "PFHPSN", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsn); + load_and_expand_2d(serializer, savepoint, "TENDENCY_LOC_T", nlon, nlev, nproma, ngptot, nblocks, tend_loc_t); + load_and_expand_2d(serializer, savepoint, "TENDENCY_LOC_Q", nlon, nlev, nproma, ngptot, nblocks, tend_loc_q); + load_and_expand_2d(serializer, savepoint, "TENDENCY_LOC_A", nlon, nlev, nproma, ngptot, nblocks, tend_loc_a); + load_and_expand_3d(serializer, savepoint, "TENDENCY_LOC_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_loc_cld); + + serialboxSerializerDestroySavepointVector(savepoints, 1); + serialboxSerializerDestroy(serializer); +} diff --git a/src/cloudsc_sycl/cloudsc/load_state.h b/src/cloudsc_sycl/cloudsc/load_state.h new file mode 100644 index 00000000..65fbf8c2 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/load_state.h @@ -0,0 +1,40 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include +#include "yoecldp_c.h" + +struct TECLDP ; + +void query_state(int *klon, int *klev); + +void load_state(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double* ptsphy, double* plcrit_aer, double* picrit_aer, + double* pre_ice, double* pccn, double* pnice, double* pt, double* pq, + double* tend_cml_t, double* tend_cml_q, double* tend_cml_a, double* tend_cml_cld, + double* tend_tmp_t, double* tend_tmp_q, double* tend_tmp_a, double* tend_tmp_cld, + double* pvfa, double* pvfl, double* pvfi, double* pdyna, double* pdynl, double* pdyni, + double* phrsw, double* phrlw, double* pvervel, double* pap, double* paph, double* plsm, + int* ktype, double* plu, double* plude, double* psnde, double* pmfu, + double* pmfd, double* pa, double* pclv, double* psupsat, struct TECLDP* yrecldp, + double* rg, double* rd, double* rcpd, double* retv, double* rlvtt, double* rlstt, + double* rlmlt, double* rtt, double* rv, double* r2es, double* r3les, double* r3ies, + double* r4les, double* r4ies, double* r5les, double* r5ies, double* r5alvcp, double* r5alscp, + double* ralvdcp, double* ralsdcp, double* ralfdcp, double* rtwat, + double* rtice, double* rticecu, double* rtwat_rtice_r, double *rtwat_rticecu_r, + double* rkoop1, double* rkoop2); + + +void load_reference(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, + double *pfcqlng, double *pfcqnng, double *pfsqrf, double *pfsqsf, double *pfcqrng, double *pfcqsng, + double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, + double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld); diff --git a/src/cloudsc_sycl/cloudsc/mycpu.cpp b/src/cloudsc_sycl/cloudsc/mycpu.cpp new file mode 100644 index 00000000..8c6e8506 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/mycpu.cpp @@ -0,0 +1,31 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#if defined(__APPLE__) +static int sched_getcpu() { return 0; } +#else +#include +#endif + +/* + * Find the core the thread belongs to + */ + +int mycpu_ () +{ + /* int sched_getcpu(void); */ + int cpu; +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wimplicit-function-declaration" + cpu = sched_getcpu(); +#pragma clang diagnostic pop + return cpu; +} +int mycpu() { return mycpu_(); } diff --git a/src/cloudsc_sycl/cloudsc/mycpu.h b/src/cloudsc_sycl/cloudsc/mycpu.h new file mode 100644 index 00000000..6b26848e --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/mycpu.h @@ -0,0 +1,11 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +int mycpu (); diff --git a/src/cloudsc_sycl/cloudsc/yoecldp_c.h b/src/cloudsc_sycl/cloudsc/yoecldp_c.h new file mode 100644 index 00000000..7fcace99 --- /dev/null +++ b/src/cloudsc_sycl/cloudsc/yoecldp_c.h @@ -0,0 +1,145 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#ifndef YOECLDP_H +#define YOECLDP_H + +//int nclv; // number of microphysics variables +//int ncldql; // liquid cloud water +//int ncldqi; // ice cloud water +//int ncldqr; // rain water +//int ncldqs; // snow +//int ncldqv; // vapour + +struct TECLDP { + double ramid; + double rcldiff; + double rcldiff_convi; + double rclcrit; + double rclcrit_sea; + double rclcrit_land; + double rkconv; + double rprc1; + double rprc2; + double rcldmax; + double rpecons; + double rvrfactor; + double rprecrhmax; + double rtaumel; + double ramin; + double rlmin; + double rkooptau; + double rcldtopp; + double rlcritsnow; + double rsnowlin1; + double rsnowlin2; + double ricehi1; + double ricehi2; + double riceinit; + double rvice; + double rvrain; + double rvsnow; + double rthomo; + double rcovpmin; + double rccn; + double rnice; + double rccnom; + double rccnss; + double rccnsu; + double rcldtopcf; + double rdepliqrefrate; + double rdepliqrefdepth; + double rcl_kkaac; + double rcl_kkbac; + double rcl_kkaau; + double rcl_kkbauq; + double rcl_kkbaun; + double rcl_kk_cloud_num_sea; + double rcl_kk_cloud_num_land; + double rcl_ai; + double rcl_bi; + double rcl_ci; + double rcl_di; + double rcl_x1i; + double rcl_x2i; + double rcl_x3i; + double rcl_x4i; + double rcl_const1i; + double rcl_const2i; + double rcl_const3i; + double rcl_const4i; + double rcl_const5i; + double rcl_const6i; + double rcl_apb1; + double rcl_apb2; + double rcl_apb3; + double rcl_as; + double rcl_bs; + double rcl_cs; + double rcl_ds; + double rcl_x1s; + double rcl_x2s; + double rcl_x3s; + double rcl_x4s; + double rcl_const1s; + double rcl_const2s; + double rcl_const3s; + double rcl_const4s; + double rcl_const5s; + double rcl_const6s; + double rcl_const7s; + double rcl_const8s; + double rdenswat; + double rdensref; + double rcl_ar; + double rcl_br; + double rcl_cr; + double rcl_dr; + double rcl_x1r; + double rcl_x2r; + double rcl_x4r; + double rcl_ka273; + double rcl_cdenom1; + double rcl_cdenom2; + double rcl_cdenom3; + double rcl_schmidt; + double rcl_dynvisc; + double rcl_const1r; + double rcl_const2r; + double rcl_const3r; + double rcl_const4r; + double rcl_fac1; + double rcl_fac2; + double rcl_const5r; + double rcl_const6r; + double rcl_fzrab; + double rcl_fzrbb; + int lcldextra, lcldbudget; + int nssopt; + int ncldtop; + int naeclbc, naecldu, naeclom, naeclss, naeclsu; + int nclddiag; + int naercld; + int laerliqautolsp; + int laerliqautocp; + int laerliqautocpb; + int laerliqcoll; + int laericesed; + int laericeauto; + double nshapep; + double nshapeq; + int nbeta; + //double rbeta[0][100]; + //double rbetap1[0][100]; +} ; + +//struct TECLDP *yrecldp; + +#endif diff --git a/src/cloudsc_sycl/dwarf_cloudsc.cpp b/src/cloudsc_sycl/dwarf_cloudsc.cpp new file mode 100644 index 00000000..43b91b84 --- /dev/null +++ b/src/cloudsc_sycl/dwarf_cloudsc.cpp @@ -0,0 +1,44 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "cloudsc_driver.h" + + +int main( int argc, char *argv[] ) { + + int omp_threads, ngptot, nproma; + int return_code; + + return_code = 0; + + // default values + omp_threads = 1; + ngptot = 100; + nproma = 4; + + if (argc == 1) { + cloudsc_driver(omp_threads, ngptot, nproma); + } + else if (argc == 4) { + omp_threads = atoi( argv[1] ); + ngptot = atoi( argv[2] ); + nproma = atoi( argv[3] ); + cloudsc_driver(omp_threads, ngptot, nproma); + } + else { + printf("Calling c-cloudsc with the right number of arguments will work better ;-) \n",argc); + return_code = EXIT_FAILURE; + } + + return return_code; +} From 698691161bc7b6567a53ba364230bd9fe770a5ac Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Mon, 15 Jan 2024 15:33:47 +0000 Subject: [PATCH 118/174] Fixed description and added SYCL to CMakeLists.txt --- CMakeLists.txt | 3 +++ bundle.yml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee1533a9..804dde7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,9 @@ if ( HAVE_HIP ) find_package(hip REQUIRED) endif() +ecbuild_add_option( FEATURE SYCL + DESCRIPTION "SYCL" DEFAULT OFF) + ### OpenMP ecbuild_add_option( FEATURE OMP DESCRIPTION "OpenMP" DEFAULT ON diff --git a/bundle.yml b/bundle.yml index 35f65f18..6e534a0a 100644 --- a/bundle.yml +++ b/bundle.yml @@ -116,7 +116,7 @@ options : ENABLE_HIP=ON - with-sycl : - help: Enable GPU kernel variant based on HIP + help: Enable GPU kernel variant based on SYCL cmake: > ENABLE_SYCL=ON From 632126257e0621f170cc41bd4b094b0fca60f419 Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Tue, 16 Jan 2024 16:54:57 +0200 Subject: [PATCH 119/174] Updating/fixing lumi cray-gpu arch/toolchain file for cce 16 --- arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh | 21 +++++++++---------- .../lumi/cray-gpu/16.0.1/toolchain.cmake | 17 ++++++++------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh index 8d9504eb..ed540848 100644 --- a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh @@ -24,19 +24,18 @@ module_unload() { module reset # Load modules -module_load PrgEnv-cray/8.3.3 -module_load LUMI/23.03 -module_load rocm/5.2.3 +module_load CrayEnv +module_load PrgEnv-cray/8.4.0 module_load cce/16.0.1 -module_load cray-libsci/22.08.1.1 -module_load cray-mpich/8.1.18 -module_load craype/2.7.23 +module_load craype-x86-trento +module_load cray-mpich/8.1.27 +module_load craype-network-ofi module_load craype-accel-amd-gfx90a -module_load buildtools/23.03 -module_load cray-hdf5/1.12.1.5 -module_load cray-python/3.9.12.1 -module_load Boost/1.81.0-cpeCray-23.03 -module_load partition/G +module_load cray-python/3.10.10 +module_load rocm/5.2.3 +module_load buildtools/22.12 +module_load LUMI/23.09 +module_load Boost/1.82.0-cpeCray-23.09 module list diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake index 63de4a48..637f2e5d 100644 --- a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake @@ -17,18 +17,21 @@ set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) # OpenMP FLAGS #################################################################### -set( ENABLE_OMP ON CACHE STRING "" ) -set( OpenMP_C_FLAGS "-homp" CACHE STRING "" ) -set( OpenMP_Fortran_FLAGS "-homp -hnoacc -hlist=aimd -maxrregcount 64" CACHE STRING "" ) +set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_C_LIB_NAMES "craymp" CACHE STRING "" ) +set( OpenMP_CXX_LIB_NAMES "craymp" CACHE STRING "" ) +set( OpenMP_Fortran_LIB_NAMES "craymp" CACHE STRING "" ) +set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/16.0.1/cce/x86_64/lib/libcraymp.so" CACHE STRING "" ) #################################################################### # OpenACC FLAGS #################################################################### -set( ENABLE_ACC ON CACHE STRING "" ) -set( OpenACC_C_FLAGS "-hacc" ) -set( OpenACC_CXX_FLAGS "-hacc" ) -set( OpenACC_Fortran_FLAGS "-hacc") # -h acc_model=deep_copy" ) +set( OpenACC_C_FLAGS "-hacc" CACHE STRING "" ) +set( OpenACC_CXX_FLAGS "-hacc" CACHE STRING "" ) +set( OpenACC_Fortran_FLAGS "-hacc" CACHE STRING "" ) #################################################################### # Compiler FLAGS From b62101cb99ca16d83a646015679c30fcb152b76d Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Tue, 16 Jan 2024 17:11:59 +0200 Subject: [PATCH 120/174] adding col/s as metric for CUDA variants --- src/cloudsc_cuda/cloudsc/cloudsc_driver.cu | 18 +++++++++++------- .../cloudsc/cloudsc_driver_hoist.cu | 18 +++++++++++------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu b/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu index d715aa68..5ac91de2 100644 --- a/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu +++ b/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu @@ -456,9 +456,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { double t2 = omp_get_wtime(); printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); - printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", - "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); - double zfrac, zmflops; + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; for (int t = 0; t < numthreads; t++) { const double tloc = zinfo[0][t]; const int coreid = (int) zinfo[1][t]; @@ -467,21 +467,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zfrac = (double)igpc / (double)numcols; if (tloc > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; } else { zmflops = 0.; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", - numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); } double tdiff = t2 - t1; zfrac = 1.0; if (tdiff > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; } else { zmflops = 0.0; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", - numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, diff --git a/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu b/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu index 9d7d615e..615cdf25 100644 --- a/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu +++ b/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu @@ -497,9 +497,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { double t2 = omp_get_wtime(); printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks); - printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n", - "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s"); - double zfrac, zmflops; + printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n", + "NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s"); + double zfrac, zmflops, zthrput; for (int t = 0; t < numthreads; t++) { const double tloc = zinfo[0][t]; const int coreid = (int) zinfo[1][t]; @@ -508,21 +508,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zfrac = (double)igpc / (double)numcols; if (tloc > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc; + zthrput = (double)numcols/tloc; } else { zmflops = 0.; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n", - numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n", + numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput); } double tdiff = t2 - t1; zfrac = 1.0; if (tdiff > 0.0) { zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff; + zthrput = (double)numcols/tdiff; } else { zmflops = 0.0; + zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n", - numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops); + printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif, From 50bcf3f2e5d5387d0a163b753e622170bd3ed803 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Wed, 17 Jan 2024 12:11:24 +0000 Subject: [PATCH 121/174] fix load_state for SYCL variant(s) --- src/cloudsc_sycl/cloudsc/load_state.cpp | 130 ++++++------------------ 1 file changed, 31 insertions(+), 99 deletions(-) diff --git a/src/cloudsc_sycl/cloudsc/load_state.cpp b/src/cloudsc_sycl/cloudsc/load_state.cpp index 610cf8f4..5ba60abf 100644 --- a/src/cloudsc_sycl/cloudsc/load_state.cpp +++ b/src/cloudsc_sycl/cloudsc/load_state.cpp @@ -1,14 +1,6 @@ -/* - * (C) Copyright 1988- ECMWF. - * - * This software is licensed under the terms of the Apache Licence Version 2.0 - * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. - * In applying this licence, ECMWF does not waive the privileges and immunities - * granted to it by virtue of its status as an intergovernmental organisation - * nor does it submit to any jurisdiction. - */ - #include "load_state.h" +//#include "yomcst_c.hpp" +#include #include #include "serialbox-c/Serialbox.h" @@ -16,7 +8,6 @@ #define min(a, b) (((a) < (b)) ? (a) : (b)) #define max(a, b) (((a) > (b)) ? (a) : (b)) - /* Query sizes and dimensions of state arrays */ void query_state(int *klon, int *klev) { @@ -32,121 +23,63 @@ void query_state(int *klon, int *klev) void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) { - int gidx, bsize, bidx, bend, fsize, b, l, i; + int b, i, buf_start_idx, buf_idx; - #pragma omp parallel for default(shared) private(gidx, bsize, bidx, bend, fsize, b, l, i) +#pragma omp parallel for default(shared) private(b, i, buf_start_idx, buf_idx) for (b = 0; b < nblocks; b++) { - gidx = b*nproma; // Global starting index of the block in the general domain - bsize = min(nproma, ngptot - gidx); // Size of the field block - bidx = gidx % nlon; // Rolling index into the input buffer - bend = min(bidx+bsize, nlon); // Idealised final index in the input buffer - - if (bend-bidx < bsize) { - // The input buffer does not hold enough data to fill field block; - // we need to fill the rest of the block with data from front of buffer. - fsize = nlon - bidx; - for (i = 0; i < fsize; i++) { field_in[b*nproma+i] = buffer[bidx + i]; } - for (i = 0; i < bsize-fsize; i++) { field_in[b*nproma+fsize+i] = buffer[i]; } - } else { - // Simply copy a block of data from the rolling buffer index - for (i = 0; i < bsize; i++) { field_in[b*nproma+i] = buffer[bidx+i]; } + buf_start_idx = ((b)*nproma) % nlon; + for (i = 0; i < nproma; i++) { + buf_idx = (buf_start_idx + i) % nlon; + field_in[b*nproma+i] = buffer[buf_idx]; } - // Zero out the remainder of last block - for (i=bsize; i Date: Wed, 17 Jan 2024 12:12:17 +0000 Subject: [PATCH 122/174] remove unused 'out_stream' from SYCL variants (which improves the performance as well) --- src/cloudsc_sycl/cloudsc/cloudsc_c.kernel | 2 +- src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel | 2 +- src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel | 2 +- src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp | 3 +-- src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp | 3 +-- src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp | 3 +-- 6 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_c.kernel b/src/cloudsc_sycl/cloudsc/cloudsc_c.kernel index cb0926bd..ab6d96a8 100644 --- a/src/cloudsc_sycl/cloudsc/cloudsc_c.kernel +++ b/src/cloudsc_sycl/cloudsc/cloudsc_c.kernel @@ -39,7 +39,7 @@ void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, double rkoop1, double rkoop2, - cl::sycl:: stream out_stream, cl::sycl::nd_item<1> item_ct) { + cl::sycl::nd_item<1> item_ct) { //------------------------------------------------------------------------------- // Declare input/output arguments diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel b/src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel index 159a02f4..a2b55bc7 100644 --- a/src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel +++ b/src/cloudsc_sycl/cloudsc/cloudsc_c_hoist.kernel @@ -45,7 +45,7 @@ void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, double * __restrict__ zpfplsx, double * __restrict__ zlneg, double * __restrict__ zqxn2d, double * __restrict__ zqsmix, double * __restrict__ zqsliq, double * __restrict__ zqsice, double * __restrict__ zfoeewmt, double * __restrict__ zfoeew, double * __restrict__ zfoeeliqt, - cl::sycl:: stream out_stream, cl::sycl::nd_item<1> item_ct) { + cl::sycl::nd_item<1> item_ct) { //------------------------------------------------------------------------------- // Declare input/output arguments diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel b/src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel index 4af9ebd3..269013dc 100644 --- a/src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel +++ b/src/cloudsc_sycl/cloudsc/cloudsc_c_k_caching.kernel @@ -39,7 +39,7 @@ void cloudsc_c(int kidia, int kfdia, int klon, double ptsphy, double r5ies, double r5alvcp, double r5alscp, double ralvdcp, double ralsdcp, double ralfdcp, double rtwat, double rtice, double rticecu, double rtwat_rtice_r, double rtwat_rticecu_r, double rkoop1, double rkoop2, - cl::sycl:: stream out_stream, cl::sycl::nd_item<1> item_ct) { + cl::sycl::nd_item<1> item_ct) { //------------------------------------------------------------------------------- // Declare input/output arguments diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp index 8e3f7d91..c5702202 100644 --- a/src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver.cpp @@ -398,7 +398,6 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { cl::sycl::range<1> local(nproma); q.submit([&](cl::sycl::handler &h) { - cl::sycl::stream out_stream(16384, 512, h); h.parallel_for( cl::sycl::nd_range<1>( global, local), [=] (cl::sycl::nd_item<1> item_ct1) { cloudsc_c(1, icend, nproma, ptsphy, d_pt, d_pq, @@ -420,7 +419,7 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { rv, r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, - rkoop1, rkoop2, out_stream, item_ct1); + rkoop1, rkoop2, item_ct1); }); }); diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp index eb00d4f0..817e9ba1 100644 --- a/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver_hoist.cpp @@ -431,7 +431,6 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { cl::sycl::range<1> local(nproma); q.submit([&](cl::sycl::handler &h) { - cl::sycl::stream out_stream(16384, 512, h); h.parallel_for( cl::sycl::nd_range<1>( global, local), [=] (cl::sycl::nd_item<1> item_ct1) { cloudsc_c(1, icend, nproma, ptsphy, d_pt, d_pq, @@ -460,7 +459,7 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { d_zpfplsx, d_zlneg, d_zqxn2d, d_zqsmix, d_zqsliq, d_zqsice, d_zfoeewmt, d_zfoeew, d_zfoeeliqt, - out_stream, item_ct1); + item_ct1); }); diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp index c191d721..9c5d17e4 100644 --- a/src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp +++ b/src/cloudsc_sycl/cloudsc/cloudsc_driver_k_caching.cpp @@ -396,7 +396,6 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { cl::sycl::range<1> local(nproma); q.submit([&](cl::sycl::handler &h) { - cl::sycl::stream out_stream(16384, 512, h); h.parallel_for( cl::sycl::nd_range<1>( global, local), [=] (cl::sycl::nd_item<1> item_ct1) { cloudsc_c(1, icend, nproma, ptsphy, d_pt, d_pq, @@ -418,7 +417,7 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { rv, r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, - rkoop1, rkoop2, out_stream, item_ct1); + rkoop1, rkoop2, item_ct1); }); }); From 82df8d3e4509f851505cf2c4c6c66da8e564a036 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Wed, 17 Jan 2024 13:00:51 +0000 Subject: [PATCH 123/174] add missing copyright header cloudsc_sycl/load_state.cpp --- src/cloudsc_sycl/cloudsc/load_state.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cloudsc_sycl/cloudsc/load_state.cpp b/src/cloudsc_sycl/cloudsc/load_state.cpp index 5ba60abf..034270ca 100644 --- a/src/cloudsc_sycl/cloudsc/load_state.cpp +++ b/src/cloudsc_sycl/cloudsc/load_state.cpp @@ -1,5 +1,14 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + #include "load_state.h" -//#include "yomcst_c.hpp" #include #include From aa802cc50055b081f16f638be4612a1779dc9e11 Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Wed, 17 Jan 2024 15:04:07 +0200 Subject: [PATCH 124/174] add missing copyright header cloudsc_hip/load_state.cpp --- src/cloudsc_hip/cloudsc/load_state.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cloudsc_hip/cloudsc/load_state.cpp b/src/cloudsc_hip/cloudsc/load_state.cpp index 5ba60abf..034270ca 100644 --- a/src/cloudsc_hip/cloudsc/load_state.cpp +++ b/src/cloudsc_hip/cloudsc/load_state.cpp @@ -1,5 +1,14 @@ +/* + * (C) Copyright 1988- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + #include "load_state.h" -//#include "yomcst_c.hpp" #include #include From d51c83e867db1f3f5b6dcf18c1d94657d0fc19b7 Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Thu, 18 Jan 2024 12:32:17 +0200 Subject: [PATCH 125/174] fix arch/env file for pragma-based offload and usage without serialbox (LUMI CCE 16) --- arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh | 1 + arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake | 7 +++++-- cmake/features/OMP.cmake | 12 ++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh index ed540848..d991f55f 100644 --- a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh @@ -36,6 +36,7 @@ module_load rocm/5.2.3 module_load buildtools/22.12 module_load LUMI/23.09 module_load Boost/1.82.0-cpeCray-23.09 +module_load cray-hdf5/1.12.2.3 module list diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake index 637f2e5d..272ab042 100644 --- a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake @@ -19,7 +19,7 @@ set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) -set( OpenMP_Fortran_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_Fortran_FLAGS "-homp -hlist=aimd" CACHE STRING "" ) set( OpenMP_C_LIB_NAMES "craymp" CACHE STRING "" ) set( OpenMP_CXX_LIB_NAMES "craymp" CACHE STRING "" ) set( OpenMP_Fortran_LIB_NAMES "craymp" CACHE STRING "" ) @@ -42,5 +42,8 @@ set(ECBUILD_Fortran_FLAGS "-hcontiguous") set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") -# set(ECBUILD_Fortran_FLAGS_BIT "-O3 -G2 -haggress -DNDEBUG") set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") + +set( GPU_TARGETS "gfx90a" CACHE STRING "" ) +# select OpenMP pragma to be used +set( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OFF CACHE BOOL "" ) diff --git a/cmake/features/OMP.cmake b/cmake/features/OMP.cmake index 7cd49da8..d938e882 100644 --- a/cmake/features/OMP.cmake +++ b/cmake/features/OMP.cmake @@ -1,5 +1,7 @@ if( HAVE_OMP ) + if( NOT DEFINED HAVE_OMP_TARGET_TEAMS_DISTRIBUTE ) + try_compile( HAVE_OMP_TARGET_TEAMS_DISTRIBUTE ${CMAKE_CURRENT_BINARY_DIR} @@ -11,6 +13,10 @@ if( HAVE_OMP ) ecbuild_debug_var( HAVE_OMP_TARGET_TEAMS_DISTRIBUTE ) ecbuild_debug_var( _HAVE_OMP_TARGET_TEAMS_DISTRIBUTE_OUTPUT ) + endif() + + if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL ) + try_compile( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL ${CMAKE_CURRENT_BINARY_DIR} @@ -22,6 +28,10 @@ if( HAVE_OMP ) ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL ) ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL_OUTPUT ) + endif() + + if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ) + try_compile( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ${CMAKE_CURRENT_BINARY_DIR} @@ -32,6 +42,8 @@ if( HAVE_OMP ) ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ) ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD_OUTPUT ) + + endif() if( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OR HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD ) set( HAVE_OMP_TARGET_LOOP_CONSTRUCT ON CACHE BOOL "OpenMP target teams loop is supported" ) From 3c5776f398d523950bdef1991f41ebde9fd0d77a Mon Sep 17 00:00:00 2001 From: Michael Staneker <50531288+MichaelSt98@users.noreply.github.com> Date: Thu, 18 Jan 2024 15:37:15 +0100 Subject: [PATCH 126/174] update arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh Co-authored-by: Balthasar Reuter <6384870+reuterbal@users.noreply.github.com> --- arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh index d991f55f..663721c8 100644 --- a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh @@ -24,19 +24,19 @@ module_unload() { module reset # Load modules -module_load CrayEnv +module_load LUMI/23.09 +module_load partition/G module_load PrgEnv-cray/8.4.0 module_load cce/16.0.1 -module_load craype-x86-trento module_load cray-mpich/8.1.27 module_load craype-network-ofi -module_load craype-accel-amd-gfx90a -module_load cray-python/3.10.10 module_load rocm/5.2.3 -module_load buildtools/22.12 -module_load LUMI/23.09 +module_load buildtools/23.09 module_load Boost/1.82.0-cpeCray-23.09 -module_load cray-hdf5/1.12.2.3 +module_load cray-python/3.10.10 +module_load cray-hdf5/1.12.2.7 +module_load craype-x86-trento +module_load craype-accel-amd-gfx90a module list From 22a11a8be0dac8b4124505bb4bd34e07b73c0bab Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 18 Jan 2024 15:10:53 +0000 Subject: [PATCH 127/174] SYCL: Add SYCL env/toolchain for custom DPCPP install with CUDA --- arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh | 48 +++++ .../intel-sycl/2021.4.0/toolchain.cmake | 1 + .../toolchains/ecmwf-hpc2020-intel-sycl.cmake | 164 ++++++++++++++++++ bundle.yml | 1 + 4 files changed, 214 insertions(+) create mode 100644 arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh create mode 120000 arch/ecmwf/hpc2020/intel-sycl/2021.4.0/toolchain.cmake create mode 100644 arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake diff --git a/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh b/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh new file mode 100644 index 00000000..6da32d30 --- /dev/null +++ b/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh @@ -0,0 +1,48 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +# Source me to get the correct configure/build/run environment + +# Store tracing and disable (module is *way* too verbose) +{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null + +module_load() { + echo "+ module load $1" + module load $1 +} +module_unload() { + echo "+ module unload $1" + module unload $1 +} + +# Unload all modules to be certain +module_unload intel +module_unload openmpi +module_unload hpcx-openmpi +module_unload boost +module_unload hdf5 +module_unload cmake +module_unload python3 +module_unload java + +# Load modules +module_load prgenv/intel +module_load intel/2021.4.0 +module_load hpcx-openmpi/2.10.0 +module_load boost/1.71.0 +module_load hdf5/1.10.6 +module_load cmake/3.20.2 +module_load python3/3.8.8-01 +module_load java/11.0.6 + +set -x + +# Restore tracing to stored setting +{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null + +export ECBUILD_TOOLCHAIN="./toolchain.cmake" diff --git a/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/toolchain.cmake b/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/toolchain.cmake new file mode 120000 index 00000000..8af2b39b --- /dev/null +++ b/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/toolchain.cmake @@ -0,0 +1 @@ +../../../../toolchains/ecmwf-hpc2020-intel-sycl.cmake \ No newline at end of file diff --git a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake new file mode 100644 index 00000000..84a1aefb --- /dev/null +++ b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake @@ -0,0 +1,164 @@ +# (C) Copyright 1988- ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. + +#################################################################### +# ARCHITECTURE +#################################################################### + +set( EC_HAVE_C_INLINE 1 ) +set( EC_HAVE_FUNCTION_DEF 1 ) +set( EC_HAVE_CXXABI_H 1 ) +set( EC_HAVE_CXX_BOOL 1 ) +set( EC_HAVE_CXX_SSTREAM 1 ) +set( EC_HAVE_CXX_INT_128 0 ) +set( CMAKE_SIZEOF_VOID_P 8 ) +set( EC_SIZEOF_PTR 8 ) +set( EC_SIZEOF_CHAR 1 ) +set( EC_SIZEOF_SHORT 2 ) +set( EC_SIZEOF_INT 4 ) +set( EC_SIZEOF_LONG 8 ) +set( EC_SIZEOF_LONG_LONG 8 ) +set( EC_SIZEOF_FLOAT 4 ) +set( EC_SIZEOF_DOUBLE 8 ) +set( EC_SIZEOF_LONG_DOUBLE 8 ) +set( EC_SIZEOF_SIZE_T 8 ) +set( EC_SIZEOF_SSIZE_T 8 ) +set( EC_SIZEOF_OFF_T 8 ) +set( EC_BIG_ENDIAN 0 ) +set( EC_LITTLE_ENDIAN 1 ) +set( IEEE_BE 0 ) +set( IEEE_LE 1 ) +set( EC_HAVE_FSEEK 1 ) +set( EC_HAVE_FSEEKO 1 ) +set( EC_HAVE_FTELLO 1 ) +set( EC_HAVE_LSEEK 0 ) +set( EC_HAVE_FTRUNCATE 0 ) +set( EC_HAVE_OPEN 0 ) +set( EC_HAVE_FOPEN 1 ) +set( EC_HAVE_FMEMOPEN 1 ) +set( EC_HAVE_FUNOPEN 0 ) +set( EC_HAVE_FLOCK 1 ) +set( EC_HAVE_MMAP 1 ) +set( EC_HAVE_POSIX_MEMALIGN 1 ) +set( EC_HAVE_F_GETLK 1 ) +set( EC_HAVE_F_SETLK 1 ) +set( EC_HAVE_F_SETLKW 1 ) +set( EC_HAVE_F_GETLK64 1 ) +set( EC_HAVE_F_SETLK64 1 ) +set( EC_HAVE_F_SETLKW64 1 ) +set( EC_HAVE_MAP_ANONYMOUS 1 ) +set( EC_HAVE_MAP_ANON 1 ) +set( EC_HAVE_ASSERT_H 1 ) +set( EC_HAVE_STDLIB_H 1 ) +set( EC_HAVE_UNISTD_H 1 ) +set( EC_HAVE_STRING_H 1 ) +set( EC_HAVE_STRINGS_H 1 ) +set( EC_HAVE_SYS_STAT_H 1 ) +set( EC_HAVE_SYS_TIME_H 1 ) +set( EC_HAVE_SYS_TYPES_H 1 ) +set( EC_HAVE_MALLOC_H 1 ) +set( EC_HAVE_SYS_MALLOC_H 0 ) +set( EC_HAVE_SYS_PARAM_H 1 ) +set( EC_HAVE_SYS_MOUNT_H 1 ) +set( EC_HAVE_SYS_VFS_H 1 ) +set( EC_HAVE_OFFT 1 ) +set( EC_HAVE_OFF64T 1 ) +set( EC_HAVE_STRUCT_STAT 1 ) +set( EC_HAVE_STRUCT_STAT64 1 ) +set( EC_HAVE_STAT 1 ) +set( EC_HAVE_STAT64 1 ) +set( EC_HAVE_FSTAT 1 ) +set( EC_HAVE_FSTAT64 1 ) +set( EC_HAVE_FSEEKO64 1 ) +set( EC_HAVE_FTELLO64 1 ) +set( EC_HAVE_LSEEK64 1 ) +set( EC_HAVE_OPEN64 1 ) +set( EC_HAVE_FOPEN64 1 ) +set( EC_HAVE_FTRUNCATE64 1 ) +set( EC_HAVE_FLOCK64 1 ) +set( EC_HAVE_MMAP64 1 ) +set( EC_HAVE_STRUCT_STATVFS 1 ) +set( EC_HAVE_STRUCT_STATVFS64 1 ) +set( EC_HAVE_FOPENCOOKIE 1 ) +set( EC_HAVE_FSYNC 1 ) +set( EC_HAVE_FDATASYNC 1 ) +set( EC_HAVE_DIRFD 1 ) +set( EC_HAVE_SYSPROC 0 ) +set( EC_HAVE_SYSPROCFS 1 ) +set( EC_HAVE_EXECINFO_BACKTRACE 1 ) +set( EC_HAVE_GMTIME_R 1 ) +set( EC_HAVE_GETPWUID_R 1 ) +set( EC_HAVE_GETPWNAM_R 1 ) +set( EC_HAVE_READDIR_R 1 ) +set( EC_HAVE_DIRENT_D_TYPE 1 ) +set( EC_HAVE_GETHOSTBYNAME_R 1 ) +set( EC_HAVE_ATTRIBUTE_CONSTRUCTOR 1 ) +set( EC_ATTRIBUTE_CONSTRUCTOR_INITS_ARGV 0 ) +set( EC_HAVE_PROCFS 1 ) +set( EC_HAVE_DLFCN_H 1 ) +set( EC_HAVE_DLADDR 1 ) +set( EC_HAVE_AIOCB 1 ) +set( EC_HAVE_AIOCB64 1 ) + +# Disable relative rpaths as aprun does not respect it +set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE ) + +#################################################################### +# COMPILER +#################################################################### + +set( ECBUILD_FIND_MPI ON ) +set( ECBUILD_TRUST_FLAGS ON ) + +#################################################################### +# Compiler FLAGS +#################################################################### + +# General Flags (add to default) + +set(ECBUILD_Fortran_FLAGS "-g") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -qopenmp-threadprivate compat") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume byterecl") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -convert big_endian") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -traceback") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -align array64byte") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -warn nounused,nouncalled") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -march=core-avx2") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-functions") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -finline-limit=1500") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Winline") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -no-fma") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -assume realloc_lhs") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-model precise") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -ftz") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fp-speculation=safe") +set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -fast-transcendentals") + +#################################################################### +# Additional compiler flags for SYCL offload via CUDA backend +#################################################################### + +# Additional Intel DPCPP compiler for SYCL offload +set(CMAKE_CXX_COMPILER "/home/nams/opt/dpcpp/bin/clang++") + +# Initial set of flags to things going with a custom DPCPP install on AC +set(CMAKE_CXX_FLAGS "-O3 -L/home/nams/opt/dpcpp/lib -fopenmp -lstdc++") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-early-optimizations -fsycl") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xcuda-ptxas --maxrregcount=128") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --cuda-gpu-arch=sm_80") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/apps/intel/2021.4.0/compiler/2021.4.0/linux/compiler/include") + +#################################################################### +# LINK FLAGS +#################################################################### + +set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr " ) +set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap" ) +set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap -Wl,--as-needed" ) +set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" ) diff --git a/bundle.yml b/bundle.yml index 6e534a0a..06f1df4e 100644 --- a/bundle.yml +++ b/bundle.yml @@ -119,6 +119,7 @@ options : help: Enable GPU kernel variant based on SYCL cmake: > ENABLE_SYCL=ON + ENABLE_CLOUDSC_SYCL=ON - with-mpi : help : Enable MPI-parallel kernel From 0196a0ab01fd3de1c4285cff96219d4aab0ef063 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 18 Jan 2024 15:51:57 +0000 Subject: [PATCH 128/174] SYCL: Add a quick README entry for the SCYL-specific env on AC --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index a217b2dd..e849a625 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,22 @@ Isambard. A set of arch and toolchain files and detailed installation and run instructions are provided [here](https://confluence.ecmwf.int/display/~nabr/3rd+Isambard+Hackathon). +### SYCL version of CLOUDSC + +A preliminary SYCL code variant has been added and tested with a custom +DPCPP install on ECMWF's AC partition. To build this, please use the +SYCL-specific environment setups: + +``` +./cloudsc-bundle build --clean --build-dir=build-sycl --with-gpu --with-sycl --with-serialbox --arch=arch/ecmwf/hpc2020/intel-sycl/2021.4.0 + +# Then run with +cd build-sycl && . env.sh +./bin/dwarf-cloudsc-scc-sycl 1 240000 128 +./bin/dwarf-cloudsc-scc-hoist-sycl 1 240000 128 +./bin/dwarf-cloudsc-scc-k-caching-sycl 1 240000 128 +``` + ## Running and testing The different prototype variants of the dwarf create different binaries that From 7f901fdbd444bc3ce5433d23e0c9984af3dacf88 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 23 Jan 2024 13:33:21 +0100 Subject: [PATCH 129/174] use correct Atlas branch for MultiField --- bundle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle.yml b/bundle.yml index f0340013..f12de8a1 100644 --- a/bundle.yml +++ b/bundle.yml @@ -61,7 +61,7 @@ projects : - atlas : git : https://github.com/ecmwf/atlas - version : 0.34.0 + version : feature/nab_atlas_MultiField optional: true require : ecbuild eckit fckit cmake : > From bf6495fe6de051ebe66447837e6ddaebe3ae8716 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 23 Jan 2024 14:26:48 +0100 Subject: [PATCH 130/174] NPROMA is not called from FunctionSpace anymore, use %SHAPE(1) instead --- src/cloudsc_fortran_atlas/validate_atlas_mod.F90 | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index d04d9233..9d852c17 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -63,7 +63,6 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) NLEV = FIELD%LEVELS() NGPTOT = FSPACE%SIZE() NBLOCKS = FSPACE%NBLKS() - !NPROMA = FSPACE%NPROMA() NPROMA = FIELD%SHAPE(1) ZMINVAL(1) = +HUGE(ZMINVAL(1)) @@ -73,7 +72,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) !CALL INPUT_INITIALIZE(NAME='reference') IF (FRANK == 2) THEN - CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CALL FIELD%DATA(FIELD_R1) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) @@ -90,7 +89,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) ENDDO END DO ELSE IF (FRANK == 3) THEN - CALL LOAD_AND_EXPAND(NAME, REF_R3, NLON, FIELD%LEVELS(), FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME, REF_R3, NLON, FIELD%LEVELS(), NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CALL FIELD%DATA(FIELD_R2) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) @@ -119,7 +118,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) IF (STATE_VAR == 'Q') THEN VAR_ID = 3 ENDIF - CALL LOAD_AND_EXPAND(NAME//'_'//STATE_VAR, REF_R3, NLON, NLEV, FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME//'_'//STATE_VAR, REF_R3, NLON, NLEV, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) DO B=1, NBLOCKS @@ -137,7 +136,7 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) END DO END DO ELSE IF (STATE_VAR == 'CLD') THEN - CALL LOAD_AND_EXPAND(NAME//'_CLD', REF_R4, NLON, NLEV, NDIM, FSPACE%NPROMA(), NGPTOT, NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND(NAME//'_CLD', REF_R4, NLON, NLEV, NDIM, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) !OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(B, BSIZE) & !& REDUCTION(MIN:ZMINVAL, MAX:ZMAX_VAL_ERR, +:ZSUM_ERR_ABS) DO B=1, NBLOCKS From 5bbd42ebca2147417e62ce9be1cd9ef16b888c48 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Tue, 23 Jan 2024 15:12:37 +0100 Subject: [PATCH 131/174] the correct Atlas branch --- bundle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle.yml b/bundle.yml index f12de8a1..860ae36a 100644 --- a/bundle.yml +++ b/bundle.yml @@ -61,7 +61,7 @@ projects : - atlas : git : https://github.com/ecmwf/atlas - version : feature/nab_atlas_MultiField + version : feature/MultiField optional: true require : ecbuild eckit fckit cmake : > From ac90e14c3e212071be35099e31dc751bcd2a802c Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Wed, 24 Jan 2024 09:46:28 +0000 Subject: [PATCH 132/174] SYCL: Drop unnecessary optimisation flags --- arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake index 84a1aefb..e80ce5bf 100644 --- a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake +++ b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake @@ -150,7 +150,6 @@ set(CMAKE_CXX_COMPILER "/home/nams/opt/dpcpp/bin/clang++") set(CMAKE_CXX_FLAGS "-O3 -L/home/nams/opt/dpcpp/lib -fopenmp -lstdc++") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-early-optimizations -fsycl") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xcuda-ptxas --maxrregcount=128") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --cuda-gpu-arch=sm_80") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/apps/intel/2021.4.0/compiler/2021.4.0/linux/compiler/include") From 734b7ceb5f5831b26fc9ea419c6fe08d8f5af2ef Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Wed, 24 Jan 2024 15:41:05 +0000 Subject: [PATCH 133/174] SYCL CMake integration changes --- CMakeLists.txt | 3 +- arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh | 2 ++ bundle.yml | 1 - src/cloudsc_sycl/CMakeLists.txt | 28 +++++++++++++------ 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 804dde7a..979b39b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,8 @@ if ( HAVE_HIP ) endif() ecbuild_add_option( FEATURE SYCL - DESCRIPTION "SYCL" DEFAULT OFF) + DESCRIPTION "SYCL" DEFAULT OFF + REQUIRED_PACKAGES "IntelSYCL") ### OpenMP ecbuild_add_option( FEATURE OMP diff --git a/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh b/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh index 6da32d30..1458d433 100644 --- a/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh +++ b/arch/ecmwf/hpc2020/intel-sycl/2021.4.0/env.sh @@ -42,6 +42,8 @@ module_load java/11.0.6 set -x +export IntelSYCL_DIR="/usr/local/apps/intel/2023.2.0/compiler/2023.2.0/linux/IntelSYCL" + # Restore tracing to stored setting { if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null diff --git a/bundle.yml b/bundle.yml index 06f1df4e..6e534a0a 100644 --- a/bundle.yml +++ b/bundle.yml @@ -119,7 +119,6 @@ options : help: Enable GPU kernel variant based on SYCL cmake: > ENABLE_SYCL=ON - ENABLE_CLOUDSC_SYCL=ON - with-mpi : help : Enable MPI-parallel kernel diff --git a/src/cloudsc_sycl/CMakeLists.txt b/src/cloudsc_sycl/CMakeLists.txt index 8929a614..1399447b 100644 --- a/src/cloudsc_sycl/CMakeLists.txt +++ b/src/cloudsc_sycl/CMakeLists.txt @@ -14,11 +14,6 @@ ecbuild_add_option( FEATURE CLOUDSC_SYCL if( HAVE_CLOUDSC_SYCL ) - set(LINK_SYCL OFF) - if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") - set(LINK_SYCL ON) - endif() - ecbuild_add_library( TARGET dwarf-cloudsc-scc-sycl-lib INSTALL_HEADERS LISTED @@ -37,11 +32,16 @@ if( HAVE_CLOUDSC_SYCL ) $ $ PUBLIC_LIBS - $<$:sycl> Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C> ) + add_sycl_to_target( + TARGET + dwarf-cloudsc-scc-sycl-lib + SOURCES + cloudsc/cloudsc_c.kernel + ) ecbuild_add_executable( TARGET dwarf-cloudsc-scc-sycl @@ -77,11 +77,17 @@ if( HAVE_CLOUDSC_SYCL ) $ $ PUBLIC_LIBS - $<$:sycl> Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C> ) + add_sycl_to_target( + TARGET + dwarf-cloudsc-scc-hoist-sycl-lib + SOURCES + cloudsc/cloudsc_c_hoist.kernel + ) + ecbuild_add_executable( TARGET dwarf-cloudsc-scc-hoist-sycl SOURCES dwarf_cloudsc.cpp @@ -116,11 +122,17 @@ if( HAVE_CLOUDSC_SYCL ) $ $ PUBLIC_LIBS - $<$:sycl> Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C> ) + add_sycl_to_target( + TARGET + dwarf-cloudsc-scc-k-caching-sycl-lib + SOURCES + cloudsc/cloudsc_c_k_caching.kernel + ) + ecbuild_add_executable( TARGET dwarf-cloudsc-scc-k-caching-sycl SOURCES dwarf_cloudsc.cpp From 62dd8626b906b25a136232f511d40438c2a932d8 Mon Sep 17 00:00:00 2001 From: Michael Staneker <50531288+MichaelSt98@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:04:20 +0100 Subject: [PATCH 134/174] Remove redundant flags in intel-sycl toolchain Co-authored-by: Balthasar Reuter <6384870+reuterbal@users.noreply.github.com> --- .../toolchains/ecmwf-hpc2020-intel-sycl.cmake | 101 ------------------ 1 file changed, 101 deletions(-) diff --git a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake index e80ce5bf..7574e5e0 100644 --- a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake +++ b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake @@ -6,107 +6,6 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. -#################################################################### -# ARCHITECTURE -#################################################################### - -set( EC_HAVE_C_INLINE 1 ) -set( EC_HAVE_FUNCTION_DEF 1 ) -set( EC_HAVE_CXXABI_H 1 ) -set( EC_HAVE_CXX_BOOL 1 ) -set( EC_HAVE_CXX_SSTREAM 1 ) -set( EC_HAVE_CXX_INT_128 0 ) -set( CMAKE_SIZEOF_VOID_P 8 ) -set( EC_SIZEOF_PTR 8 ) -set( EC_SIZEOF_CHAR 1 ) -set( EC_SIZEOF_SHORT 2 ) -set( EC_SIZEOF_INT 4 ) -set( EC_SIZEOF_LONG 8 ) -set( EC_SIZEOF_LONG_LONG 8 ) -set( EC_SIZEOF_FLOAT 4 ) -set( EC_SIZEOF_DOUBLE 8 ) -set( EC_SIZEOF_LONG_DOUBLE 8 ) -set( EC_SIZEOF_SIZE_T 8 ) -set( EC_SIZEOF_SSIZE_T 8 ) -set( EC_SIZEOF_OFF_T 8 ) -set( EC_BIG_ENDIAN 0 ) -set( EC_LITTLE_ENDIAN 1 ) -set( IEEE_BE 0 ) -set( IEEE_LE 1 ) -set( EC_HAVE_FSEEK 1 ) -set( EC_HAVE_FSEEKO 1 ) -set( EC_HAVE_FTELLO 1 ) -set( EC_HAVE_LSEEK 0 ) -set( EC_HAVE_FTRUNCATE 0 ) -set( EC_HAVE_OPEN 0 ) -set( EC_HAVE_FOPEN 1 ) -set( EC_HAVE_FMEMOPEN 1 ) -set( EC_HAVE_FUNOPEN 0 ) -set( EC_HAVE_FLOCK 1 ) -set( EC_HAVE_MMAP 1 ) -set( EC_HAVE_POSIX_MEMALIGN 1 ) -set( EC_HAVE_F_GETLK 1 ) -set( EC_HAVE_F_SETLK 1 ) -set( EC_HAVE_F_SETLKW 1 ) -set( EC_HAVE_F_GETLK64 1 ) -set( EC_HAVE_F_SETLK64 1 ) -set( EC_HAVE_F_SETLKW64 1 ) -set( EC_HAVE_MAP_ANONYMOUS 1 ) -set( EC_HAVE_MAP_ANON 1 ) -set( EC_HAVE_ASSERT_H 1 ) -set( EC_HAVE_STDLIB_H 1 ) -set( EC_HAVE_UNISTD_H 1 ) -set( EC_HAVE_STRING_H 1 ) -set( EC_HAVE_STRINGS_H 1 ) -set( EC_HAVE_SYS_STAT_H 1 ) -set( EC_HAVE_SYS_TIME_H 1 ) -set( EC_HAVE_SYS_TYPES_H 1 ) -set( EC_HAVE_MALLOC_H 1 ) -set( EC_HAVE_SYS_MALLOC_H 0 ) -set( EC_HAVE_SYS_PARAM_H 1 ) -set( EC_HAVE_SYS_MOUNT_H 1 ) -set( EC_HAVE_SYS_VFS_H 1 ) -set( EC_HAVE_OFFT 1 ) -set( EC_HAVE_OFF64T 1 ) -set( EC_HAVE_STRUCT_STAT 1 ) -set( EC_HAVE_STRUCT_STAT64 1 ) -set( EC_HAVE_STAT 1 ) -set( EC_HAVE_STAT64 1 ) -set( EC_HAVE_FSTAT 1 ) -set( EC_HAVE_FSTAT64 1 ) -set( EC_HAVE_FSEEKO64 1 ) -set( EC_HAVE_FTELLO64 1 ) -set( EC_HAVE_LSEEK64 1 ) -set( EC_HAVE_OPEN64 1 ) -set( EC_HAVE_FOPEN64 1 ) -set( EC_HAVE_FTRUNCATE64 1 ) -set( EC_HAVE_FLOCK64 1 ) -set( EC_HAVE_MMAP64 1 ) -set( EC_HAVE_STRUCT_STATVFS 1 ) -set( EC_HAVE_STRUCT_STATVFS64 1 ) -set( EC_HAVE_FOPENCOOKIE 1 ) -set( EC_HAVE_FSYNC 1 ) -set( EC_HAVE_FDATASYNC 1 ) -set( EC_HAVE_DIRFD 1 ) -set( EC_HAVE_SYSPROC 0 ) -set( EC_HAVE_SYSPROCFS 1 ) -set( EC_HAVE_EXECINFO_BACKTRACE 1 ) -set( EC_HAVE_GMTIME_R 1 ) -set( EC_HAVE_GETPWUID_R 1 ) -set( EC_HAVE_GETPWNAM_R 1 ) -set( EC_HAVE_READDIR_R 1 ) -set( EC_HAVE_DIRENT_D_TYPE 1 ) -set( EC_HAVE_GETHOSTBYNAME_R 1 ) -set( EC_HAVE_ATTRIBUTE_CONSTRUCTOR 1 ) -set( EC_ATTRIBUTE_CONSTRUCTOR_INITS_ARGV 0 ) -set( EC_HAVE_PROCFS 1 ) -set( EC_HAVE_DLFCN_H 1 ) -set( EC_HAVE_DLADDR 1 ) -set( EC_HAVE_AIOCB 1 ) -set( EC_HAVE_AIOCB64 1 ) - -# Disable relative rpaths as aprun does not respect it -set( ENABLE_RELATIVE_RPATHS OFF CACHE STRING "Disable relative rpaths" FORCE ) #################################################################### # COMPILER From b6be083a05ea68dd9d219be4053929cc9b6d75df Mon Sep 17 00:00:00 2001 From: Michael Staneker <50531288+MichaelSt98@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:05:10 +0100 Subject: [PATCH 135/174] remove irrelevant ECBUILD intel-sycl flags/switches Co-authored-by: Balthasar Reuter <6384870+reuterbal@users.noreply.github.com> --- arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake index 7574e5e0..9f581bcc 100644 --- a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake +++ b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake @@ -7,12 +7,6 @@ # nor does it submit to any jurisdiction. -#################################################################### -# COMPILER -#################################################################### - -set( ECBUILD_FIND_MPI ON ) -set( ECBUILD_TRUST_FLAGS ON ) #################################################################### # Compiler FLAGS From 04ed5bfa1e9db689cc209dafea15fba3a61dd216 Mon Sep 17 00:00:00 2001 From: Michael Staneker <50531288+MichaelSt98@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:06:06 +0100 Subject: [PATCH 136/174] Remove irrelevant ECBUILD LINKER flags Co-authored-by: Balthasar Reuter <6384870+reuterbal@users.noreply.github.com> --- arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake index 9f581bcc..bd95f5a0 100644 --- a/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake +++ b/arch/toolchains/ecmwf-hpc2020-intel-sycl.cmake @@ -46,11 +46,3 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xsycl-target-backend --cuda-gpu-arch=sm_80") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/apps/intel/2021.4.0/compiler/2021.4.0/linux/compiler/include") -#################################################################### -# LINK FLAGS -#################################################################### - -set( ECBUILD_SHARED_LINKER_FLAGS "-Wl,--eh-frame-hdr " ) -set( ECBUILD_MODULE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap" ) -set( ECBUILD_EXE_LINKER_FLAGS "-Wl,--eh-frame-hdr -Wl,-Map,loadmap -Wl,--as-needed" ) -set( ECBUILD_CXX_IMPLICIT_LINK_LIBRARIES "${LIBCRAY_CXX_RTS}" CACHE STRING "" ) From 3deee2c532ce5397929113ba51ffbfff0585c018 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Wed, 31 Jan 2024 08:34:09 +0000 Subject: [PATCH 137/174] Loki F2C transpilation, use 'loki_transform' instead of 'loki_transform_transpile' --- src/cloudsc_loki/CMakeLists.txt | 45 +++++++++++++++++---------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index c7b2c60e..af226e76 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -658,28 +658,29 @@ endif() if ( NOT HAVE_SINGLE_PRECISION ) cloudsc_xmod( loki-c ) - - loki_transform_transpile( - FRONTEND ${LOKI_FRONTEND} CPP - HEADERS - ${COMMON_MODULE}/parkind1.F90 - ${COMMON_MODULE}/yomphyder.F90 - ${COMMON_MODULE}/yomcst.F90 - ${COMMON_MODULE}/yoethf.F90 - ${COMMON_MODULE}/yoecldp.F90 - ${COMMON_MODULE}/fcttre_mod.F90 - ${COMMON_MODULE}/fccld_mod.F90 - DRIVER ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_driver_loki_mod.F90 - SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc.F90 - INCLUDES ${COMMON_INCLUDE} - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-c - OUTPUT - loki-c/cloudsc_driver_loki_mod.c.F90 - loki-c/cloudsc_fc.F90 loki-c/cloudsc_c.c - loki-c/yoethf_fc.F90 loki-c/yomcst_fc.F90 - loki-c/yoecldp_fc.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + + loki_transform( + COMMAND convert + MODE c FRONTEND ${LOKI_FRONTEND} CPP + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS + ${COMMON_MODULE}/parkind1.F90 + ${COMMON_MODULE}/yomphyder.F90 + ${COMMON_MODULE}/yomcst.F90 + ${COMMON_MODULE}/yoethf.F90 + ${COMMON_MODULE}/yoecldp.F90 + ${COMMON_MODULE}/fcttre_mod.F90 + ${COMMON_MODULE}/fccld_mod.F90 + INCLUDES ${COMMON_INCLUDE} + XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-c + OUTPUT + loki-c/cloudsc_driver_loki_mod.c.F90 + loki-c/cloudsc_fc.F90 loki-c/cloudsc_c.c + loki-c/yoethf_fc.F90 loki-c/yomcst_fc.F90 + loki-c/yoecldp_fc.F90 + DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} ) # Define the CLAW-CPU build target for this variant From bca971f7532cc0f65baf65f8b470430a33559f49 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 6 Sep 2023 15:39:42 +0000 Subject: [PATCH 138/174] Regression tested without packed storage on new FIELD_API --- bundle.yml | 13 +- .../cloudsc_driver_gpu_scc_field_mod.F90 | 58 ++- src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 | 3 + src/common/CMakeLists.txt | 2 +- src/common/module/cloudsc_field_state_mod.F90 | 432 +++++++++--------- src/common/module/yomphyder.F90 | 6 +- 6 files changed, 278 insertions(+), 236 deletions(-) diff --git a/bundle.yml b/bundle.yml index f0340013..ac985b09 100644 --- a/bundle.yml +++ b/bundle.yml @@ -69,14 +69,13 @@ projects : ATLAS_ENABLE_CUDA=OFF - field_api : - git : ${BITBUCKET}/rdx/field_api - version : master + git : https://github.com/ecmwf-ifs/field_api.git + version : main optional: true require : ecbuild cmake : > - ENABLE_FIELD_API_TESTS=OFF - ENABLE_FIELD_API_FIAT_BUILD=OFF - FIELD_API_UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module + ENABLE_SINGLE_PRECISION=OFF + UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module - cloudsc-dwarf : # The CLOUDSC dwarf project with multiple implementations @@ -92,7 +91,9 @@ options : - single-precision : help : Enable single precision build of the dwarf - cmake : ENABLE_SINGLE_PRECISION=ON + cmake : > + ENABLE_SINGLE_PRECISION=ON + FIELD_API_DEFINITIONS=SINGLE - with-gpu : help : Enable GPU kernels diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 index 503f2ed7..bf7b4ab5 100644 --- a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 +++ b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 @@ -156,7 +156,21 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & CALL FIELD_STATE%TENDENCY_LOC%F_Q%GET_DEVICE_DATA_RDWR(TEND_LOC_Q) CALL FIELD_STATE%TENDENCY_LOC%F_A%GET_DEVICE_DATA_RDWR(TEND_LOC_A) CALL FIELD_STATE%TENDENCY_LOC%F_CLD%GET_DEVICE_DATA_RDWR(TEND_LOC_CLD) -!$acc data copyin(yrecldp) +!$acc data copyin(yrecldp) deviceptr(PT, PQ,TEND_TMP_T,TEND_TMP_Q,& +!$acc & TEND_TMP_A, TEND_TMP_CLD, TEND_LOC_T, TEND_LOC_Q, & +!$acc & TEND_LOC_A, TEND_LOC_CLD, PVFA, PVFL, PVFI, & +!$acc & PDYNA, PDYNL, PDYNI, PHRSW, PHRLW,& +!$acc & PVERVEL, PAP, PAPH,& +!$acc & PLSM, LDCUM, KTYPE, & +!$acc & PLU, PLUDE, PSNDE, PMFU, PMFD,& +!$acc & PA, PCLV, PSUPSAT,& +!$acc & PLCRIT_AER,PICRIT_AER,& +!$acc & PRE_ICE, PCCN, PNICE,& +!$acc & PCOVPTOT, PRAINFRAC_TOPRFZ,& +!$acc & PFSQLF, PFSQIF , PFCQNNG, PFCQLNG,& +!$acc & PFSQRF, PFSQSF , PFCQRNG, PFCQSNG,& +!$acc & PFSQLTUR, PFSQITUR , & +!$acc & PFPLSL, PFPLSN, PFHPSL, PFHPSN) ! Local timer for each thread TID = GET_THREAD_NUM() @@ -198,27 +212,27 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & CALL TIMER%THREAD_END(TID) - CALL FIELD_STATE%F_PLUDE%ENSURE_HOST() - CALL FIELD_STATE%F_PCOVPTOT%ENSURE_HOST() - CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%ENSURE_HOST() - CALL FIELD_STATE%F_PFSQLF%ENSURE_HOST() - CALL FIELD_STATE%F_PFSQIF%ENSURE_HOST() - CALL FIELD_STATE%F_PFCQLNG%ENSURE_HOST() - CALL FIELD_STATE%F_PFCQNNG%ENSURE_HOST() - CALL FIELD_STATE%F_PFSQRF%ENSURE_HOST() - CALL FIELD_STATE%F_PFSQSF%ENSURE_HOST() - CALL FIELD_STATE%F_PFCQRNG%ENSURE_HOST() - CALL FIELD_STATE%F_PFCQSNG%ENSURE_HOST() - CALL FIELD_STATE%F_PFSQLTUR%ENSURE_HOST() - CALL FIELD_STATE%F_PFSQITUR%ENSURE_HOST() - CALL FIELD_STATE%F_PFPLSL%ENSURE_HOST() - CALL FIELD_STATE%F_PFPLSN%ENSURE_HOST() - CALL FIELD_STATE%F_PFHPSL%ENSURE_HOST() - CALL FIELD_STATE%F_PFHPSN%ENSURE_HOST() - CALL FIELD_STATE%TENDENCY_LOC%F_T%ENSURE_HOST() - CALL FIELD_STATE%TENDENCY_LOC%F_Q%ENSURE_HOST() - CALL FIELD_STATE%TENDENCY_LOC%F_A%ENSURE_HOST() - CALL FIELD_STATE%TENDENCY_LOC%F_CLD%ENSURE_HOST() + CALL FIELD_STATE%F_PLUDE%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PCOVPTOT%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFSQLF%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFSQIF%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFCQLNG%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFCQNNG%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFSQRF%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFSQSF%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFCQRNG%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFCQSNG%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFSQLTUR%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFSQITUR%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFPLSL%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFPLSN%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFHPSL%SYNC_HOST_RDWR() + CALL FIELD_STATE%F_PFHPSN%SYNC_HOST_RDWR() + CALL FIELD_STATE%TENDENCY_LOC%F_T%SYNC_HOST_RDWR() + CALL FIELD_STATE%TENDENCY_LOC%F_Q%SYNC_HOST_RDWR() + CALL FIELD_STATE%TENDENCY_LOC%F_A%SYNC_HOST_RDWR() + CALL FIELD_STATE%TENDENCY_LOC%F_CLD%SYNC_HOST_RDWR() CALL TIMER%END() diff --git a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 index fbc5e321..04f085fe 100644 --- a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 +++ b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 @@ -292,6 +292,9 @@ PROGRAM DWARF_CLOUDSC ! Validate the output against serialized reference data CALL GLOBAL_STATE%VALIDATE(NPROMA, NGPTOT, NGPTOTG) +#ifdef CLOUDSC_GPU_SCC_FIELD +CALL GLOBAL_STATE%FINALIZE(USE_PACKED) +#endif ! Tear down MPI environment CALL CLOUDSC_MPI_END() diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index fd571521..f5c11dc0 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -94,7 +94,7 @@ ecbuild_add_library( TARGET cloudsc-common-lib $<${HAVE_MPI}:MPI::MPI_Fortran> $<${HAVE_HDF5}:hdf5::hdf5_fortran> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_Fortran> - $<${HAVE_FIELD_API}:field_api> + $<${HAVE_FIELD_API}:field_api_dp> # FIELD_API uses parkind1 from CLOUDSC, so both dp and sp variants are identical ) if( HAVE_CUDA ) diff --git a/src/common/module/cloudsc_field_state_mod.F90 b/src/common/module/cloudsc_field_state_mod.F90 index 345dcccb..599f00d2 100644 --- a/src/common/module/cloudsc_field_state_mod.F90 +++ b/src/common/module/cloudsc_field_state_mod.F90 @@ -20,7 +20,8 @@ MODULE CLOUDSC_FIELD_STATE_MOD USE EXPAND_MOD, ONLY: EXPAND, LOAD_AND_EXPAND, LOAD_AND_EXPAND_STATE, GET_OFFSETS USE VALIDATE_MOD, ONLY: VALIDATE USE CLOUDSC_MPI_MOD, ONLY: IRANK - USE FIELD_MODULE, ONLY: FIELD_2D, FIELD_2D_OWNER, FIELD_3D, FIELD_3D_WRAPPER_PACKED, FIELD_3D_OWNER, FIELD_4D, FIELD_4D_OWNER, FIELD_INT2D, FIELD_INT2D_OWNER, FIELD_LOG2D, FIELD_LOG2D_OWNER, MALLOC_HOST_PINNED_4D + USE FIELD_MODULE, ONLY: FIELD_2RB, FIELD_3RB, FIELD_4RB, FIELD_2IM, FIELD_2LM + USE FIELD_FACTORY_MODULE, ONLY: FIELD_NEW, FIELD_DELETE IMPLICIT NONE @@ -42,18 +43,19 @@ MODULE CLOUDSC_FIELD_STATE_MOD REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA_RWONLY(:,:,:,:) ! Storage fields to provide thread-local views - CLASS(FIELD_2D), POINTER :: F_PRAINFRAC_TOPRFZ, F_PLSM - CLASS(FIELD_INT2D), POINTER :: F_KTYPE - CLASS(FIELD_LOG2D), POINTER :: F_LDCUM - CLASS(FIELD_3D), POINTER :: F_PLCRIT_AER, F_PICRIT_AER, F_PRE_ICE, F_PCCN, F_PNICE, F_PT, F_PQ, & + CLASS(FIELD_2RB), POINTER :: F_PRAINFRAC_TOPRFZ, F_PLSM + CLASS(FIELD_2IM), POINTER :: F_KTYPE + CLASS(FIELD_2LM), POINTER :: F_LDCUM + CLASS(FIELD_3RB), POINTER :: F_PLCRIT_AER, F_PICRIT_AER, F_PRE_ICE, F_PCCN, F_PNICE, F_PT, F_PQ, & & F_PVFA, F_PVFL, F_PVFI, F_PDYNA, F_PDYNL, F_PDYNI, F_PHRSW, F_PHRLW, F_PVERVEL, F_PAP, F_PAPH, & & F_PLU, F_PLUDE, F_PSNDE, F_PMFU, F_PMFD, F_PA, F_PSUPSAT, F_PCOVPTOT - CLASS(FIELD_3D), POINTER :: F_PFSQLF, F_PFSQIF, F_PFCQLNG, F_PFCQNNG, F_PFSQRF, F_PFSQSF, F_PFCQRNG, & + CLASS(FIELD_3RB), POINTER :: F_PFSQLF, F_PFSQIF, F_PFCQLNG, F_PFCQNNG, F_PFSQRF, F_PFSQSF, F_PFCQRNG, & & F_PFCQSNG, F_PFSQLTUR, F_PFSQITUR, F_PFPLSL, F_PFPLSN, F_PFHPSL, F_PFHPSN - CLASS(FIELD_4D), POINTER :: F_PCLV + CLASS(FIELD_4RB), POINTER :: F_PCLV CONTAINS PROCEDURE :: LOAD => CLOUDSC_FIELD_STATE_LOAD PROCEDURE :: VALIDATE => CLOUDSC_FIELD_STATE_VALIDATE + PROCEDURE :: FINALIZE => CLOUDSC_FIELD_STATE_FINALIZE END TYPE CLOUDSC_FIELD_STATE INTERFACE FIELD_INIT @@ -63,55 +65,18 @@ MODULE CLOUDSC_FIELD_STATE_MOD CONTAINS - FUNCTION CREATE_FIELD_ALLOCATE_INT2D(SHAPE, NBLOCKS) RESULT(FIELD_PTR) - CLASS(FIELD_INT2D_OWNER), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1), NBLOCKS - INTEGER(KIND=JPIM) :: B - INTEGER(KIND=JPIM) :: LBOUNDS(2),UBOUNDS(2) - - LBOUNDS(1) = 1 - LBOUNDS(2) = 1 - UBOUNDS(1) = SHAPE(1) - UBOUNDS(2) = NBLOCKS - - ALLOCATE(FIELD_PTR) - CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) - END FUNCTION CREATE_FIELD_ALLOCATE_INT2D - - FUNCTION CREATE_FIELD_ALLOCATE_LOG2D(SHAPE, NBLOCKS) RESULT(FIELD_PTR) - CLASS(FIELD_LOG2D_OWNER), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1), NBLOCKS - INTEGER(KIND=JPIM) :: B - INTEGER(KIND=JPIM) :: LBOUNDS(2),UBOUNDS(2) - - LBOUNDS(1) = 1 - LBOUNDS(2) = 1 - UBOUNDS(1) = SHAPE(1) - UBOUNDS(2) = NBLOCKS - - ALLOCATE(FIELD_PTR) - CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) - END FUNCTION CREATE_FIELD_ALLOCATE_LOG2D - - FUNCTION CREATE_FIELD_ALLOCATE_2D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) - CLASS(FIELD_2D_OWNER), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(1), NBLOCKS + FUNCTION CREATE_FIELD_ALLOCATE_2D(SHAPE, ZERO) RESULT(FIELD_PTR) + CLASS(FIELD_2RB), POINTER :: FIELD_PTR + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2) LOGICAL, OPTIONAL, INTENT(IN) :: ZERO INTEGER(KIND=JPIM) :: B - INTEGER(KIND=JPIM) :: LBOUNDS(2), UBOUNDS(2) - LBOUNDS(1) = 1 - LBOUNDS(2) = 1 - UBOUNDS(1) = SHAPE(1) - UBOUNDS(2) = NBLOCKS - - ALLOCATE(FIELD_PTR) - CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) + CALL FIELD_NEW(FIELD_PTR, UBOUNDS=SHAPE, PERSISTENT=.TRUE.) IF (PRESENT(ZERO)) THEN IF (ZERO) THEN !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) SCHEDULE(RUNTIME) - DO B=1, NBLOCKS + DO B=1, SHAPE(2) FIELD_PTR%PTR(:,B) = 0.0_JPRB END DO !$OMP END PARALLEL DO @@ -119,27 +84,18 @@ FUNCTION CREATE_FIELD_ALLOCATE_2D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) END IF END FUNCTION CREATE_FIELD_ALLOCATE_2D - FUNCTION CREATE_FIELD_ALLOCATE_3D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) - CLASS(FIELD_3D_OWNER), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2), NBLOCKS + FUNCTION CREATE_FIELD_ALLOCATE_3D(SHAPE, ZERO) RESULT(FIELD_PTR) + CLASS(FIELD_3RB), POINTER :: FIELD_PTR + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3) LOGICAL, OPTIONAL, INTENT(IN) :: ZERO INTEGER(KIND=JPIM) :: B - INTEGER(KIND=JPIM) :: LBOUNDS(3), UBOUNDS(3) - - LBOUNDS(1) = 1 - LBOUNDS(2) = 1 - LBOUNDS(3) = 1 - UBOUNDS(1) = SHAPE(1) - UBOUNDS(2) = SHAPE(2) - UBOUNDS(3) = NBLOCKS - ALLOCATE(FIELD_PTR) - CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) + CALL FIELD_NEW(FIELD_PTR, UBOUNDS=SHAPE, PERSISTENT=.TRUE.) IF (PRESENT(ZERO)) THEN IF (ZERO) THEN !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) SCHEDULE(RUNTIME) - DO B=1, NBLOCKS + DO B=1, SHAPE(3) FIELD_PTR%PTR(:,:,B) = 0.0_JPRB END DO !$OMP END PARALLEL DO @@ -147,29 +103,18 @@ FUNCTION CREATE_FIELD_ALLOCATE_3D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) END IF END FUNCTION CREATE_FIELD_ALLOCATE_3D - FUNCTION CREATE_FIELD_ALLOCATE_4D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) - CLASS(FIELD_4D_OWNER), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3), NBLOCKS + FUNCTION CREATE_FIELD_ALLOCATE_4D(SHAPE, ZERO) RESULT(FIELD_PTR) + CLASS(FIELD_4RB), POINTER :: FIELD_PTR + INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(4) LOGICAL, OPTIONAL, INTENT(IN) :: ZERO INTEGER(KIND=JPIM) :: B - INTEGER(KIND=JPIM) :: LBOUNDS(4), UBOUNDS(4) - LBOUNDS(1) = 1 - LBOUNDS(2) = 1 - LBOUNDS(3) = 1 - LBOUNDS(4) = 1 - UBOUNDS(1) = SHAPE(1) - UBOUNDS(2) = SHAPE(2) - UBOUNDS(3) = SHAPE(3) - UBOUNDS(4) = NBLOCKS - - ALLOCATE(FIELD_PTR) - CALL FIELD_PTR%INIT(LBOUNDS, UBOUNDS, PERSISTENT=.TRUE.) + CALL FIELD_NEW(FIELD_PTR, UBOUNDS=SHAPE, PERSISTENT=.TRUE.) IF (PRESENT(ZERO)) THEN IF (ZERO) THEN !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) SCHEDULE(RUNTIME) - DO B=1, NBLOCKS + DO B=1, SHAPE(4) FIELD_PTR%PTR(:,:,:,B) = 0.0_JPRB END DO !$OMP END PARALLEL DO @@ -177,19 +122,19 @@ FUNCTION CREATE_FIELD_ALLOCATE_4D(SHAPE, NBLOCKS, ZERO) RESULT(FIELD_PTR) END IF END FUNCTION CREATE_FIELD_ALLOCATE_4D - FUNCTION CREATE_FIELD_WRAP_PACKED_3D(DATA, IDX) RESULT(FIELD_PTR) - ! Create a single 3D field with implicit blocking dimension by wrapping existing data - CLASS(FIELD_3D_WRAPPER_PACKED), POINTER :: FIELD_PTR - REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) - INTEGER(KIND=JPIM), INTENT(IN) :: IDX - - ALLOCATE(FIELD_PTR) - CALL FIELD_PTR%INIT(DATA=DATA, IDX=IDX) - END FUNCTION CREATE_FIELD_WRAP_PACKED_3D +! FUNCTION CREATE_FIELD_WRAP_PACKED_3D(DATA, IDX) RESULT(FIELD_PTR) +! ! Create a single 3D field with implicit blocking dimension by wrapping existing data +! CLASS(FIELD_3D_WRAPPER_PACKED), POINTER :: FIELD_PTR +! REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) +! INTEGER(KIND=JPIM), INTENT(IN) :: IDX +! +! ALLOCATE(FIELD_PTR) +! CALL FIELD_PTR%INIT(DATA=DATA, IDX=IDX) +! END FUNCTION CREATE_FIELD_WRAP_PACKED_3D SUBROUTINE LOAD_AND_EXPAND_FIELD_2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - CLASS(FIELD_2D), INTENT(INOUT) :: FIELD + CLASS(FIELD_2RB), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:) @@ -204,7 +149,7 @@ END SUBROUTINE LOAD_AND_EXPAND_FIELD_2D SUBROUTINE LOAD_AND_EXPAND_FIELD_INT2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - CLASS(FIELD_INT2D), INTENT(INOUT) :: FIELD + CLASS(FIELD_2IM), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG INTEGER(KIND=JPIM), ALLOCATABLE :: BUFFER(:) @@ -219,7 +164,7 @@ END SUBROUTINE LOAD_AND_EXPAND_FIELD_INT2D SUBROUTINE LOAD_AND_EXPAND_FIELD_LOG2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - CLASS(FIELD_LOG2D), INTENT(INOUT) :: FIELD + CLASS(FIELD_2LM), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG LOGICAL(KIND=JPLM), ALLOCATABLE :: BUFFER(:) @@ -234,7 +179,7 @@ END SUBROUTINE LOAD_AND_EXPAND_FIELD_LOG2D SUBROUTINE LOAD_AND_EXPAND_FIELD_3D(NAME, FIELD, NLON, NLEV, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - CLASS(FIELD_3D), INTENT(INOUT) :: FIELD + CLASS(FIELD_3RB), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NLEV, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:,:) @@ -249,7 +194,7 @@ END SUBROUTINE LOAD_AND_EXPAND_FIELD_3D SUBROUTINE LOAD_AND_EXPAND_FIELD_4D(NAME, FIELD, NLON, NLEV, NDIM, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME - CLASS(FIELD_4D), INTENT(INOUT) :: FIELD + CLASS(FIELD_4RB), INTENT(INOUT) :: FIELD INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NLEV,NDIM, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:,:,:) @@ -329,6 +274,7 @@ SUBROUTINE FIELD_INIT_STATE(STATE, BUFFER, NPROMA, NLEV, NDIM, NBLOCKS) END SUBROUTINE FIELD_INIT_STATE SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) + USE FIELD_DEFAULTS_MODULE, ONLY: INIT_PINNED_VALUE, INIT_MAP_DEVPTR ! Load reference input data via serialbox CLASS(CLOUDSC_FIELD_STATE) :: SELF INTEGER(KIND=JPIM), INTENT(IN) :: NPROMA, NGPTOT @@ -344,6 +290,11 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) LLPACKED = .FALSE. IF (PRESENT(USE_PACKED)) LLPACKED = USE_PACKED + ! Set this flag to enable pinning of fields in page-locked memory + INIT_PINNED_VALUE = .TRUE. + ! Set this flag to disable host-mapped device pointers + INIT_MAP_DEVPTR = .FALSE. + CALL INPUT_INITIALIZE(NAME='input') SELF%NBLOCKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1) @@ -354,128 +305,131 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) CALL GET_OFFSETS(START, END, SIZE, KLON, SELF%KLEV, NCLV, NGPTOT, NGPTOTG) IF (LLPACKED) THEN - ! Allocate bulk buffers for read-only input 3D fields - NFIELDS = 24 - ! ALLOCATE(SELF%DATA_RDONLY(NPROMA, SELF%KLEV, NFIELDS, SELF%NBLOCKS)) - SELF%DATA_RDONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV, NFIELDS], SELF%NBLOCKS) - - SELF%F_PT => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=1) - SELF%F_PQ => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=2) - SELF%F_PVFA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=3) - SELF%F_PVFL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=4) - SELF%F_PVFI => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=5) - SELF%F_PDYNA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=6) - SELF%F_PDYNL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=7) - SELF%F_PDYNI => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=8) - SELF%F_PHRSW => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=9) - SELF%F_PHRLW => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=10) - SELF%F_PVERVEL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=11) - SELF%F_PAP => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=12) - SELF%F_PLU => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=13) - SELF%F_PLUDE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=14) - SELF%F_PSNDE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=15) - SELF%F_PMFU => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=16) - SELF%F_PMFD => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=17) - SELF%F_PA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=18) - SELF%F_PSUPSAT => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=19) - SELF%F_PLCRIT_AER => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=20) - SELF%F_PICRIT_AER => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=21) - SELF%F_PRE_ICE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=22) - SELF%F_PCCN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=23) - SELF%F_PNICE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=24) + PRINT *, "Packed storage option not yet enabled" + ERROR STOP - ! Custom fields that do not share shape or data type with the other blocks - SELF%F_PAPH => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS) - SELF%F_PLSM => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) - SELF%F_LDCUM => CREATE_FIELD_ALLOCATE_LOG2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) - SELF%F_KTYPE => CREATE_FIELD_ALLOCATE_INT2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) - SELF%F_PCLV => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PCOVPTOT => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PRAINFRAC_TOPRFZ => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - - ! Allocate bulk buffers for output 3D fields - NFIELDS = 14 - ! CALL FIELD_INIT(SELF%DATA_RWONLY, NPROMA, SELF%KLEV+1, NFIELDS, SELF%NBLOCKS) - SELF%DATA_RWONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV+1, NFIELDS], SELF%NBLOCKS) - SELF%DATA_RWONLY(:,:,:,:) = 0.0_JPRB - - SELF%F_PFSQLF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=1) - SELF%F_PFSQIF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=2) - SELF%F_PFCQLNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=3) - SELF%F_PFCQNNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=4) - SELF%F_PFSQRF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=5) - SELF%F_PFSQSF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=6) - SELF%F_PFCQRNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=7) - SELF%F_PFCQSNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=8) - SELF%F_PFSQLTUR => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=9) - SELF%F_PFSQITUR => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=10) - SELF%F_PFPLSL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=11) - SELF%F_PFPLSN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=12) - SELF%F_PFHPSL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=13) - SELF%F_PFHPSN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=14) +! ! Allocate bulk buffers for read-only input 3D fields +! NFIELDS = 24 +! ! ALLOCATE(SELF%DATA_RDONLY(NPROMA, SELF%KLEV, NFIELDS, SELF%NBLOCKS)) +! SELF%DATA_RDONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV, NFIELDS], SELF%NBLOCKS) +! +! SELF%F_PT => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=1) +! SELF%F_PQ => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=2) +! SELF%F_PVFA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=3) +! SELF%F_PVFL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=4) +! SELF%F_PVFI => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=5) +! SELF%F_PDYNA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=6) +! SELF%F_PDYNL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=7) +! SELF%F_PDYNI => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=8) +! SELF%F_PHRSW => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=9) +! SELF%F_PHRLW => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=10) +! SELF%F_PVERVEL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=11) +! SELF%F_PAP => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=12) +! SELF%F_PLU => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=13) +! SELF%F_PLUDE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=14) +! SELF%F_PSNDE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=15) +! SELF%F_PMFU => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=16) +! SELF%F_PMFD => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=17) +! SELF%F_PA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=18) +! SELF%F_PSUPSAT => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=19) +! SELF%F_PLCRIT_AER => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=20) +! SELF%F_PICRIT_AER => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=21) +! SELF%F_PRE_ICE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=22) +! SELF%F_PCCN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=23) +! SELF%F_PNICE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=24) +! +! ! Custom fields that do not share shape or data type with the other blocks +! SELF%F_PAPH => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS) +! SELF%F_PLSM => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) +! SELF%F_LDCUM => CREATE_FIELD_ALLOCATE_LOG2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) +! SELF%F_KTYPE => CREATE_FIELD_ALLOCATE_INT2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) +! SELF%F_PCLV => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV], NBLOCKS=SELF%NBLOCKS) +! SELF%F_PCOVPTOT => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) +! SELF%F_PRAINFRAC_TOPRFZ => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) +! +! ! Allocate bulk buffers for output 3D fields +! NFIELDS = 14 +! ! CALL FIELD_INIT(SELF%DATA_RWONLY, NPROMA, SELF%KLEV+1, NFIELDS, SELF%NBLOCKS) +! SELF%DATA_RWONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV+1, NFIELDS], SELF%NBLOCKS) +! SELF%DATA_RWONLY(:,:,:,:) = 0.0_JPRB +! +! SELF%F_PFSQLF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=1) +! SELF%F_PFSQIF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=2) +! SELF%F_PFCQLNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=3) +! SELF%F_PFCQNNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=4) +! SELF%F_PFSQRF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=5) +! SELF%F_PFSQSF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=6) +! SELF%F_PFCQRNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=7) +! SELF%F_PFCQSNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=8) +! SELF%F_PFSQLTUR => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=9) +! SELF%F_PFSQITUR => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=10) +! SELF%F_PFPLSL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=11) +! SELF%F_PFPLSN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=12) +! SELF%F_PFHPSL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=13) +! SELF%F_PFHPSN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=14) ELSE - SELF%F_PT => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PQ => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PVFA => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PVFL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PVFI => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PDYNA => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PDYNL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PDYNI => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PHRSW => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PHRLW => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PVERVEL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PAP => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PLU => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PLUDE => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PSNDE => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PMFU => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PMFD => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PA => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PSUPSAT => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PLCRIT_AER => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PICRIT_AER => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PRE_ICE => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PCCN => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PNICE => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) + CALL FIELD_NEW(SELF%F_PT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PQ, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PVFA, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PVFL, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PVFI, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PDYNA, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PDYNL, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PDYNI, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PHRSW, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PHRLW, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PVERVEL, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PAP, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PLU, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PLUDE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PSNDE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PMFU, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PMFD, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PA, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PSUPSAT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PLCRIT_AER, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PICRIT_AER, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PRE_ICE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PCCN, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PNICE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) ! Custom fields that do not share shape or data type with the other blocks - SELF%F_PAPH => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS) - SELF%F_PLSM => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) - SELF%F_LDCUM => CREATE_FIELD_ALLOCATE_LOG2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) - SELF%F_KTYPE => CREATE_FIELD_ALLOCATE_INT2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) - SELF%F_PCLV => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PCOVPTOT => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%F_PRAINFRAC_TOPRFZ => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) + CALL FIELD_NEW(SELF%F_PAPH, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PLSM, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_LDCUM, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_KTYPE, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PCLV, UBOUNDS=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PCOVPTOT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + SELF%F_PRAINFRAC_TOPRFZ => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA,SELF%NBLOCKS], ZERO=.TRUE.) ! Allocate bulk buffers for output 3D fields - SELF%F_PFSQLF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFSQIF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFCQLNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFCQNNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFSQRF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFSQSF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFCQRNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFCQSNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFSQLTUR => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFSQITUR => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFPLSL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFPLSN => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFHPSL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%F_PFHPSN => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) + SELF%F_PFSQLF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFSQIF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFCQLNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFCQNNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFSQRF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFSQSF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFCQRNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFCQSNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFSQLTUR => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFSQITUR => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFPLSL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFPLSN => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFHPSL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%F_PFHPSN => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) END IF ! TODO: For now we treat all fields as single-allocations - SELF%TENDENCY_LOC%F_T => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%TENDENCY_LOC%F_A => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%TENDENCY_LOC%F_Q => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) - SELF%TENDENCY_LOC%F_CLD => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) + SELF%TENDENCY_LOC%F_T => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%TENDENCY_LOC%F_A => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%TENDENCY_LOC%F_Q => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV,SELF%NBLOCKS], ZERO=.TRUE.) + SELF%TENDENCY_LOC%F_CLD => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%TENDENCY_TMP%F_T => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%TENDENCY_TMP%F_A => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%TENDENCY_TMP%F_Q => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) - SELF%TENDENCY_TMP%F_CLD => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV], NBLOCKS=SELF%NBLOCKS) + CALL FIELD_NEW(SELF%TENDENCY_TMP%F_T, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%TENDENCY_TMP%F_A, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%TENDENCY_TMP%F_Q, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%TENDENCY_TMP%F_CLD, UBOUNDS=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL LOAD_AND_EXPAND_FIELD_3D('PLCRIT_AER', SELF%F_PLCRIT_AER, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('PICRIT_AER', SELF%F_PICRIT_AER, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) @@ -614,4 +568,74 @@ SUBROUTINE CLOUDSC_FIELD_STATE_VALIDATE(SELF, NPROMA, NGPTOT, NGPTOTG) END SUBROUTINE CLOUDSC_FIELD_STATE_VALIDATE + SUBROUTINE CLOUDSC_FIELD_STATE_FINALIZE(SELF, USE_PACKED) + ! Validate the correctness of output against reference data + CLASS(CLOUDSC_FIELD_STATE) :: SELF + ! Use this toggle to switch between standalone fields and bulk-allocated ones. + LOGICAL, INTENT(IN), OPTIONAL :: USE_PACKED + + IF(USE_PACKED)THEN + + ELSE + CALL FIELD_DELETE(SELF%F_PT) + CALL FIELD_DELETE(SELF%F_PQ) + CALL FIELD_DELETE(SELF%F_PVFA) + CALL FIELD_DELETE(SELF%F_PVFL) + CALL FIELD_DELETE(SELF%F_PVFI) + CALL FIELD_DELETE(SELF%F_PDYNA) + CALL FIELD_DELETE(SELF%F_PDYNL) + CALL FIELD_DELETE(SELF%F_PDYNI) + CALL FIELD_DELETE(SELF%F_PHRSW) + CALL FIELD_DELETE(SELF%F_PHRLW) + CALL FIELD_DELETE(SELF%F_PVERVEL) + CALL FIELD_DELETE(SELF%F_PAP) + CALL FIELD_DELETE(SELF%F_PLU) + CALL FIELD_DELETE(SELF%F_PLUDE) + CALL FIELD_DELETE(SELF%F_PSNDE) + CALL FIELD_DELETE(SELF%F_PMFU) + CALL FIELD_DELETE(SELF%F_PMFD) + CALL FIELD_DELETE(SELF%F_PA) + CALL FIELD_DELETE(SELF%F_PSUPSAT) + CALL FIELD_DELETE(SELF%F_PLCRIT_AER) + CALL FIELD_DELETE(SELF%F_PICRIT_AER) + CALL FIELD_DELETE(SELF%F_PRE_ICE) + CALL FIELD_DELETE(SELF%F_PCCN) + CALL FIELD_DELETE(SELF%F_PNICE) + + CALL FIELD_DELETE(SELF%F_PAPH) + CALL FIELD_DELETE(SELF%F_PLSM) + CALL FIELD_DELETE(SELF%F_LDCUM) + CALL FIELD_DELETE(SELF%F_KTYPE) + CALL FIELD_DELETE(SELF%F_PCLV) + CALL FIELD_DELETE(SELF%F_PCOVPTOT) + CALL FIELD_DELETE(SELF%F_PRAINFRAC_TOPRFZ) + + CALL FIELD_DELETE(SELF%F_PFSQLF) + CALL FIELD_DELETE(SELF%F_PFSQIF) + CALL FIELD_DELETE(SELF%F_PFCQLNG) + CALL FIELD_DELETE(SELF%F_PFCQNNG) + CALL FIELD_DELETE(SELF%F_PFSQRF) + CALL FIELD_DELETE(SELF%F_PFSQSF) + CALL FIELD_DELETE(SELF%F_PFCQRNG) + CALL FIELD_DELETE(SELF%F_PFCQSNG) + CALL FIELD_DELETE(SELF%F_PFSQLTUR) + CALL FIELD_DELETE(SELF%F_PFSQITUR) + CALL FIELD_DELETE(SELF%F_PFPLSL) + CALL FIELD_DELETE(SELF%F_PFPLSN) + CALL FIELD_DELETE(SELF%F_PFHPSL) + CALL FIELD_DELETE(SELF%F_PFHPSN) + + CALL FIELD_DELETE(SELF%TENDENCY_LOC%F_T) + CALL FIELD_DELETE(SELF%TENDENCY_LOC%F_A) + CALL FIELD_DELETE(SELF%TENDENCY_LOC%F_Q) + CALL FIELD_DELETE(SELF%TENDENCY_LOC%F_CLD) + + CALL FIELD_DELETE(SELF%TENDENCY_TMP%F_T) + CALL FIELD_DELETE(SELF%TENDENCY_TMP%F_A) + CALL FIELD_DELETE(SELF%TENDENCY_TMP%F_Q) + CALL FIELD_DELETE(SELF%TENDENCY_TMP%F_CLD) + ENDIF + + END SUBROUTINE CLOUDSC_FIELD_STATE_FINALIZE + END MODULE CLOUDSC_FIELD_STATE_MOD diff --git a/src/common/module/yomphyder.F90 b/src/common/module/yomphyder.F90 index 18158348..62c8c1af 100644 --- a/src/common/module/yomphyder.F90 +++ b/src/common/module/yomphyder.F90 @@ -11,7 +11,7 @@ module yomphyder USE PARKIND1, ONLY : JPIM, JPRB #ifdef USE_FIELD_API -USE FIELD_MODULE, ONLY: FIELD_3D, FIELD_4D +USE FIELD_MODULE, ONLY: FIELD_3RB, FIELD_4RB #endif ! ------------------------------------------------------------------ @@ -37,8 +37,8 @@ module yomphyder !REAL(KIND=JPRB), dimension(:,:), pointer :: qsat ! spec. humidity at saturation #ifdef USE_FIELD_API - CLASS(FIELD_3D), POINTER :: F_T, F_A, F_Q - CLASS(FIELD_4D), POINTER :: F_CLD + CLASS(FIELD_3RB), POINTER :: F_T, F_A, F_Q + CLASS(FIELD_4RB), POINTER :: F_CLD #endif end type state_type From 395793afa9ad63fa0cd4e06ef54dd9d702c28674 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Tue, 12 Sep 2023 14:39:06 +0000 Subject: [PATCH 139/174] Switched to FIELD_XX_PTR containers for RDONLY and RWONLY fields --- .../cloudsc_driver_gpu_scc_field_mod.F90 | 107 +++--- src/common/module/cloudsc_field_state_mod.F90 | 306 +++++++----------- 2 files changed, 171 insertions(+), 242 deletions(-) diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 index bf7b4ab5..59aefbe7 100644 --- a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 +++ b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 @@ -102,60 +102,61 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & ! Global timer for the parallel region CALL TIMER%START(NUMOMP) - CALL FIELD_STATE%F_PT%GET_DEVICE_DATA_RDONLY(PT) - CALL FIELD_STATE%F_PQ%GET_DEVICE_DATA_RDONLY(PQ) - CALL FIELD_STATE%F_PVFA%GET_DEVICE_DATA_RDONLY(PVFA) - CALL FIELD_STATE%F_PVFL%GET_DEVICE_DATA_RDONLY(PVFL) - CALL FIELD_STATE%F_PVFI%GET_DEVICE_DATA_RDONLY(PVFI) - CALL FIELD_STATE%F_PDYNA%GET_DEVICE_DATA_RDONLY(PDYNA) - CALL FIELD_STATE%F_PDYNL%GET_DEVICE_DATA_RDONLY(PDYNL) - CALL FIELD_STATE%F_PDYNI%GET_DEVICE_DATA_RDONLY(PDYNI) - CALL FIELD_STATE%F_PHRSW%GET_DEVICE_DATA_RDONLY(PHRSW) - CALL FIELD_STATE%F_PHRLW%GET_DEVICE_DATA_RDONLY(PHRLW) - CALL FIELD_STATE%F_PVERVEL%GET_DEVICE_DATA_RDONLY(PVERVEL) - CALL FIELD_STATE%F_PAP%GET_DEVICE_DATA_RDONLY(PAP) + CALL FIELD_STATE%FIELDS_RDONLY(1)%PTR%GET_DEVICE_DATA_RDONLY(PT) + CALL FIELD_STATE%FIELDS_RDONLY(2)%PTR%GET_DEVICE_DATA_RDONLY(PQ) + CALL FIELD_STATE%FIELDS_RDONLY(3)%PTR%GET_DEVICE_DATA_RDONLY(PVFA) + CALL FIELD_STATE%FIELDS_RDONLY(4)%PTR%GET_DEVICE_DATA_RDONLY(PVFL) + CALL FIELD_STATE%FIELDS_RDONLY(5)%PTR%GET_DEVICE_DATA_RDONLY(PVFI) + CALL FIELD_STATE%FIELDS_RDONLY(6)%PTR%GET_DEVICE_DATA_RDONLY(PDYNA) + CALL FIELD_STATE%FIELDS_RDONLY(7)%PTR%GET_DEVICE_DATA_RDONLY(PDYNL) + CALL FIELD_STATE%FIELDS_RDONLY(8)%PTR%GET_DEVICE_DATA_RDONLY(PDYNI) + CALL FIELD_STATE%FIELDS_RDONLY(9)%PTR%GET_DEVICE_DATA_RDONLY(PHRSW) + CALL FIELD_STATE%FIELDS_RDONLY(10)%PTR%GET_DEVICE_DATA_RDONLY(PHRLW) + CALL FIELD_STATE%FIELDS_RDONLY(11)%PTR%GET_DEVICE_DATA_RDONLY(PVERVEL) + CALL FIELD_STATE%FIELDS_RDONLY(12)%PTR%GET_DEVICE_DATA_RDONLY(PAP) CALL FIELD_STATE%F_PAPH%GET_DEVICE_DATA_RDONLY(PAPH) CALL FIELD_STATE%F_PLSM%GET_DEVICE_DATA_RDONLY(PLSM) CALL FIELD_STATE%F_LDCUM%GET_DEVICE_DATA_RDONLY(LDCUM) CALL FIELD_STATE%F_KTYPE%GET_DEVICE_DATA_RDONLY(KTYPE) - CALL FIELD_STATE%F_PLU%GET_DEVICE_DATA_RDONLY(PLU) - CALL FIELD_STATE%F_PSNDE%GET_DEVICE_DATA_RDONLY(PSNDE) - CALL FIELD_STATE%F_PMFU%GET_DEVICE_DATA_RDONLY(PMFU) - CALL FIELD_STATE%F_PMFD%GET_DEVICE_DATA_RDONLY(PMFD) - CALL FIELD_STATE%F_PA%GET_DEVICE_DATA_RDONLY(PA) + CALL FIELD_STATE%FIELDS_RDONLY(13)%PTR%GET_DEVICE_DATA_RDONLY(PLU) + CALL FIELD_STATE%FIELDS_RDONLY(15)%PTR%GET_DEVICE_DATA_RDONLY(PSNDE) + CALL FIELD_STATE%FIELDS_RDONLY(16)%PTR%GET_DEVICE_DATA_RDONLY(PMFU) + CALL FIELD_STATE%FIELDS_RDONLY(17)%PTR%GET_DEVICE_DATA_RDONLY(PMFD) + CALL FIELD_STATE%FIELDS_RDONLY(18)%PTR%GET_DEVICE_DATA_RDONLY(PA) CALL FIELD_STATE%F_PCLV%GET_DEVICE_DATA_RDONLY(PCLV) - CALL FIELD_STATE%F_PSUPSAT%GET_DEVICE_DATA_RDONLY(PSUPSAT) - CALL FIELD_STATE%F_PLCRIT_AER%GET_DEVICE_DATA_RDONLY(PLCRIT_AER) - CALL FIELD_STATE%F_PICRIT_AER%GET_DEVICE_DATA_RDONLY(PICRIT_AER) - CALL FIELD_STATE%F_PRE_ICE%GET_DEVICE_DATA_RDONLY(PRE_ICE) - CALL FIELD_STATE%F_PCCN%GET_DEVICE_DATA_RDONLY(PCCN) - CALL FIELD_STATE%F_PNICE%GET_DEVICE_DATA_RDONLY(PNICE) + CALL FIELD_STATE%FIELDS_RDONLY(19)%PTR%GET_DEVICE_DATA_RDONLY(PSUPSAT) + CALL FIELD_STATE%FIELDS_RDONLY(20)%PTR%GET_DEVICE_DATA_RDONLY(PLCRIT_AER) + CALL FIELD_STATE%FIELDS_RDONLY(21)%PTR%GET_DEVICE_DATA_RDONLY(PICRIT_AER) + CALL FIELD_STATE%FIELDS_RDONLY(22)%PTR%GET_DEVICE_DATA_RDONLY(PRE_ICE) + CALL FIELD_STATE%FIELDS_RDONLY(23)%PTR%GET_DEVICE_DATA_RDONLY(PCCN) + CALL FIELD_STATE%FIELDS_RDONLY(24)%PTR%GET_DEVICE_DATA_RDONLY(PNICE) CALL FIELD_STATE%TENDENCY_TMP%F_T%GET_DEVICE_DATA_RDONLY(TEND_TMP_T) CALL FIELD_STATE%TENDENCY_TMP%F_Q%GET_DEVICE_DATA_RDONLY(TEND_TMP_Q) CALL FIELD_STATE%TENDENCY_TMP%F_A%GET_DEVICE_DATA_RDONLY(TEND_TMP_A) CALL FIELD_STATE%TENDENCY_TMP%F_CLD%GET_DEVICE_DATA_RDONLY(TEND_TMP_CLD) - CALL FIELD_STATE%F_PLUDE%GET_DEVICE_DATA_RDWR(PLUDE) + CALL FIELD_STATE%FIELDS_RDONLY(14)%PTR%GET_DEVICE_DATA_RDWR(PLUDE) CALL FIELD_STATE%F_PCOVPTOT%GET_DEVICE_DATA_RDWR(PCOVPTOT) CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%GET_DEVICE_DATA_RDWR(PRAINFRAC_TOPRFZ) - CALL FIELD_STATE%F_PFSQLF%GET_DEVICE_DATA_RDWR(PFSQLF) - CALL FIELD_STATE%F_PFSQIF%GET_DEVICE_DATA_RDWR(PFSQIF) - CALL FIELD_STATE%F_PFCQLNG%GET_DEVICE_DATA_RDWR(PFCQLNG) - CALL FIELD_STATE%F_PFCQNNG%GET_DEVICE_DATA_RDWR(PFCQNNG) - CALL FIELD_STATE%F_PFSQRF%GET_DEVICE_DATA_RDWR(PFSQRF) - CALL FIELD_STATE%F_PFSQSF%GET_DEVICE_DATA_RDWR(PFSQSF) - CALL FIELD_STATE%F_PFCQRNG%GET_DEVICE_DATA_RDWR(PFCQRNG) - CALL FIELD_STATE%F_PFCQSNG%GET_DEVICE_DATA_RDWR(PFCQSNG) - CALL FIELD_STATE%F_PFSQLTUR%GET_DEVICE_DATA_RDWR(PFSQLTUR) - CALL FIELD_STATE%F_PFSQITUR%GET_DEVICE_DATA_RDWR(PFSQITUR) - CALL FIELD_STATE%F_PFPLSL%GET_DEVICE_DATA_RDWR(PFPLSL) - CALL FIELD_STATE%F_PFPLSN%GET_DEVICE_DATA_RDWR(PFPLSN) - CALL FIELD_STATE%F_PFHPSL%GET_DEVICE_DATA_RDWR(PFHPSL) - CALL FIELD_STATE%F_PFHPSN%GET_DEVICE_DATA_RDWR(PFHPSN) + CALL FIELD_STATE%FIELDS_RWONLY(1)%PTR%GET_DEVICE_DATA_RDWR(PFSQLF) + CALL FIELD_STATE%FIELDS_RWONLY(2)%PTR%GET_DEVICE_DATA_RDWR(PFSQIF) + CALL FIELD_STATE%FIELDS_RWONLY(3)%PTR%GET_DEVICE_DATA_RDWR(PFCQLNG) + CALL FIELD_STATE%FIELDS_RWONLY(4)%PTR%GET_DEVICE_DATA_RDWR(PFCQNNG) + CALL FIELD_STATE%FIELDS_RWONLY(5)%PTR%GET_DEVICE_DATA_RDWR(PFSQRF) + CALL FIELD_STATE%FIELDS_RWONLY(6)%PTR%GET_DEVICE_DATA_RDWR(PFSQSF) + CALL FIELD_STATE%FIELDS_RWONLY(7)%PTR%GET_DEVICE_DATA_RDWR(PFCQRNG) + CALL FIELD_STATE%FIELDS_RWONLY(8)%PTR%GET_DEVICE_DATA_RDWR(PFCQSNG) + CALL FIELD_STATE%FIELDS_RWONLY(9)%PTR%GET_DEVICE_DATA_RDWR(PFSQLTUR) + CALL FIELD_STATE%FIELDS_RWONLY(10)%PTR%GET_DEVICE_DATA_RDWR(PFSQITUR) + CALL FIELD_STATE%FIELDS_RWONLY(11)%PTR%GET_DEVICE_DATA_RDWR(PFPLSL) + CALL FIELD_STATE%FIELDS_RWONLY(12)%PTR%GET_DEVICE_DATA_RDWR(PFPLSN) + CALL FIELD_STATE%FIELDS_RWONLY(13)%PTR%GET_DEVICE_DATA_RDWR(PFHPSL) + CALL FIELD_STATE%FIELDS_RWONLY(14)%PTR%GET_DEVICE_DATA_RDWR(PFHPSN) CALL FIELD_STATE%TENDENCY_LOC%F_T%GET_DEVICE_DATA_RDWR(TEND_LOC_T) CALL FIELD_STATE%TENDENCY_LOC%F_Q%GET_DEVICE_DATA_RDWR(TEND_LOC_Q) CALL FIELD_STATE%TENDENCY_LOC%F_A%GET_DEVICE_DATA_RDWR(TEND_LOC_A) CALL FIELD_STATE%TENDENCY_LOC%F_CLD%GET_DEVICE_DATA_RDWR(TEND_LOC_CLD) + !$acc data copyin(yrecldp) deviceptr(PT, PQ,TEND_TMP_T,TEND_TMP_Q,& !$acc & TEND_TMP_A, TEND_TMP_CLD, TEND_LOC_T, TEND_LOC_Q, & !$acc & TEND_LOC_A, TEND_LOC_CLD, PVFA, PVFL, PVFI, & @@ -212,23 +213,23 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & CALL TIMER%THREAD_END(TID) - CALL FIELD_STATE%F_PLUDE%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RDONLY(14)%PTR%SYNC_HOST_RDWR() CALL FIELD_STATE%F_PCOVPTOT%SYNC_HOST_RDWR() CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFSQLF%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFSQIF%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFCQLNG%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFCQNNG%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFSQRF%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFSQSF%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFCQRNG%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFCQSNG%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFSQLTUR%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFSQITUR%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFPLSL%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFPLSN%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFHPSL%SYNC_HOST_RDWR() - CALL FIELD_STATE%F_PFHPSN%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(1)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(2)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(3)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(4)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(5)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(6)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(7)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(8)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(9)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(10)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(11)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(12)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(13)%PTR%SYNC_HOST_RDWR() + CALL FIELD_STATE%FIELDS_RWONLY(14)%PTR%SYNC_HOST_RDWR() CALL FIELD_STATE%TENDENCY_LOC%F_T%SYNC_HOST_RDWR() CALL FIELD_STATE%TENDENCY_LOC%F_Q%SYNC_HOST_RDWR() CALL FIELD_STATE%TENDENCY_LOC%F_A%SYNC_HOST_RDWR() diff --git a/src/common/module/cloudsc_field_state_mod.F90 b/src/common/module/cloudsc_field_state_mod.F90 index 599f00d2..e1dd4e62 100644 --- a/src/common/module/cloudsc_field_state_mod.F90 +++ b/src/common/module/cloudsc_field_state_mod.F90 @@ -20,7 +20,7 @@ MODULE CLOUDSC_FIELD_STATE_MOD USE EXPAND_MOD, ONLY: EXPAND, LOAD_AND_EXPAND, LOAD_AND_EXPAND_STATE, GET_OFFSETS USE VALIDATE_MOD, ONLY: VALIDATE USE CLOUDSC_MPI_MOD, ONLY: IRANK - USE FIELD_MODULE, ONLY: FIELD_2RB, FIELD_3RB, FIELD_4RB, FIELD_2IM, FIELD_2LM + USE FIELD_MODULE, ONLY: FIELD_2RB, FIELD_3RB, FIELD_4RB, FIELD_2IM, FIELD_2LM, FIELD_3RB_PTR USE FIELD_FACTORY_MODULE, ONLY: FIELD_NEW, FIELD_DELETE IMPLICIT NONE @@ -46,12 +46,11 @@ MODULE CLOUDSC_FIELD_STATE_MOD CLASS(FIELD_2RB), POINTER :: F_PRAINFRAC_TOPRFZ, F_PLSM CLASS(FIELD_2IM), POINTER :: F_KTYPE CLASS(FIELD_2LM), POINTER :: F_LDCUM - CLASS(FIELD_3RB), POINTER :: F_PLCRIT_AER, F_PICRIT_AER, F_PRE_ICE, F_PCCN, F_PNICE, F_PT, F_PQ, & - & F_PVFA, F_PVFL, F_PVFI, F_PDYNA, F_PDYNL, F_PDYNI, F_PHRSW, F_PHRLW, F_PVERVEL, F_PAP, F_PAPH, & - & F_PLU, F_PLUDE, F_PSNDE, F_PMFU, F_PMFD, F_PA, F_PSUPSAT, F_PCOVPTOT - CLASS(FIELD_3RB), POINTER :: F_PFSQLF, F_PFSQIF, F_PFCQLNG, F_PFCQNNG, F_PFSQRF, F_PFSQSF, F_PFCQRNG, & - & F_PFCQSNG, F_PFSQLTUR, F_PFSQITUR, F_PFPLSL, F_PFPLSN, F_PFHPSL, F_PFHPSN + CLASS(FIELD_3RB), POINTER :: F_PAPH, F_PCOVPTOT CLASS(FIELD_4RB), POINTER :: F_PCLV + + TYPE(FIELD_3RB_PTR), ALLOCATABLE :: FIELDS_RDONLY(:) + TYPE(FIELD_3RB_PTR), ALLOCATABLE :: FIELDS_RWONLY(:) CONTAINS PROCEDURE :: LOAD => CLOUDSC_FIELD_STATE_LOAD PROCEDURE :: VALIDATE => CLOUDSC_FIELD_STATE_VALIDATE @@ -65,63 +64,6 @@ MODULE CLOUDSC_FIELD_STATE_MOD CONTAINS - FUNCTION CREATE_FIELD_ALLOCATE_2D(SHAPE, ZERO) RESULT(FIELD_PTR) - CLASS(FIELD_2RB), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(2) - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: B - - CALL FIELD_NEW(FIELD_PTR, UBOUNDS=SHAPE, PERSISTENT=.TRUE.) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) THEN - !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) SCHEDULE(RUNTIME) - DO B=1, SHAPE(2) - FIELD_PTR%PTR(:,B) = 0.0_JPRB - END DO - !$OMP END PARALLEL DO - END IF - END IF - END FUNCTION CREATE_FIELD_ALLOCATE_2D - - FUNCTION CREATE_FIELD_ALLOCATE_3D(SHAPE, ZERO) RESULT(FIELD_PTR) - CLASS(FIELD_3RB), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(3) - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: B - - CALL FIELD_NEW(FIELD_PTR, UBOUNDS=SHAPE, PERSISTENT=.TRUE.) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) THEN - !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) SCHEDULE(RUNTIME) - DO B=1, SHAPE(3) - FIELD_PTR%PTR(:,:,B) = 0.0_JPRB - END DO - !$OMP END PARALLEL DO - END IF - END IF - END FUNCTION CREATE_FIELD_ALLOCATE_3D - - FUNCTION CREATE_FIELD_ALLOCATE_4D(SHAPE, ZERO) RESULT(FIELD_PTR) - CLASS(FIELD_4RB), POINTER :: FIELD_PTR - INTEGER(KIND=JPIM), INTENT(IN) :: SHAPE(4) - LOGICAL, OPTIONAL, INTENT(IN) :: ZERO - INTEGER(KIND=JPIM) :: B - - CALL FIELD_NEW(FIELD_PTR, UBOUNDS=SHAPE, PERSISTENT=.TRUE.) - - IF (PRESENT(ZERO)) THEN - IF (ZERO) THEN - !$OMP PARALLEL DO DEFAULT(SHARED), PRIVATE(B) SCHEDULE(RUNTIME) - DO B=1, SHAPE(4) - FIELD_PTR%PTR(:,:,:,B) = 0.0_JPRB - END DO - !$OMP END PARALLEL DO - END IF - END IF - END FUNCTION CREATE_FIELD_ALLOCATE_4D - ! FUNCTION CREATE_FIELD_WRAP_PACKED_3D(DATA, IDX) RESULT(FIELD_PTR) ! ! Create a single 3D field with implicit blocking dimension by wrapping existing data ! CLASS(FIELD_3D_WRAPPER_PACKED), POINTER :: FIELD_PTR @@ -138,12 +80,14 @@ SUBROUTINE LOAD_AND_EXPAND_FIELD_2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:) + REAL(KIND=JPRB), POINTER :: PTR(:,:) INTEGER(KIND=JPIM) :: START, END, SIZE CALL GET_OFFSETS(START, END, SIZE, NLON, 1, 1, NGPTOT, NGPTOTG) ALLOCATE(BUFFER(SIZE)) CALL LOAD_ARRAY(NAME, START, END, SIZE, NLON, BUFFER) - CALL EXPAND(BUFFER, FIELD%PTR, SIZE, NPROMA, NGPTOT, NBLOCKS) + CALL FIELD%GET_HOST_DATA_RDWR(PTR) + CALL EXPAND(BUFFER, PTR, SIZE, NPROMA, NGPTOT, NBLOCKS) DEALLOCATE(BUFFER) END SUBROUTINE LOAD_AND_EXPAND_FIELD_2D @@ -153,12 +97,14 @@ SUBROUTINE LOAD_AND_EXPAND_FIELD_INT2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCK INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG INTEGER(KIND=JPIM), ALLOCATABLE :: BUFFER(:) + INTEGER(KIND=JPIM), POINTER :: PTR(:,:) INTEGER(KIND=JPIM) :: START, END, SIZE CALL GET_OFFSETS(START, END, SIZE, NLON, 1, 1, NGPTOT, NGPTOTG) ALLOCATE(BUFFER(SIZE)) CALL LOAD_ARRAY(NAME, START, END, SIZE, NLON, BUFFER) - CALL EXPAND(BUFFER, FIELD%PTR, SIZE, NPROMA, NGPTOT, NBLOCKS) + CALL FIELD%GET_HOST_DATA_RDWR(PTR) + CALL EXPAND(BUFFER, PTR, SIZE, NPROMA, NGPTOT, NBLOCKS) DEALLOCATE(BUFFER) END SUBROUTINE LOAD_AND_EXPAND_FIELD_INT2D @@ -168,12 +114,14 @@ SUBROUTINE LOAD_AND_EXPAND_FIELD_LOG2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCK INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG LOGICAL(KIND=JPLM), ALLOCATABLE :: BUFFER(:) + LOGICAL(KIND=JPLM), POINTER :: PTR(:,:) INTEGER(KIND=JPIM) :: START, END, SIZE CALL GET_OFFSETS(START, END, SIZE, NLON, 1, 1, NGPTOT, NGPTOTG) ALLOCATE(BUFFER(SIZE)) CALL LOAD_ARRAY(NAME, START, END, SIZE, NLON, BUFFER) - CALL EXPAND(BUFFER, FIELD%PTR, SIZE, NPROMA, NGPTOT, NBLOCKS) + CALL FIELD%GET_HOST_DATA_RDWR(PTR) + CALL EXPAND(BUFFER, PTR, SIZE, NPROMA, NGPTOT, NBLOCKS) DEALLOCATE(BUFFER) END SUBROUTINE LOAD_AND_EXPAND_FIELD_LOG2D @@ -183,12 +131,14 @@ SUBROUTINE LOAD_AND_EXPAND_FIELD_3D(NAME, FIELD, NLON, NLEV, NPROMA, NGPTOT, NBL INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NLEV, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:,:) + REAL(KIND=JPRB), POINTER :: PTR(:,:,:) INTEGER(KIND=JPIM) :: START, END, SIZE CALL GET_OFFSETS(START, END, SIZE, NLON, 1, NLEV, NGPTOT, NGPTOTG) ALLOCATE(BUFFER(SIZE, NLEV)) CALL LOAD_ARRAY(NAME, START, END, SIZE, NLON, NLEV, BUFFER) - CALL EXPAND(BUFFER, FIELD%PTR, SIZE, NPROMA, NLEV, NGPTOT, NBLOCKS) + CALL FIELD%GET_HOST_DATA_RDWR(PTR) + CALL EXPAND(BUFFER, PTR, SIZE, NPROMA, NLEV, NGPTOT, NBLOCKS) DEALLOCATE(BUFFER) END SUBROUTINE LOAD_AND_EXPAND_FIELD_3D @@ -198,12 +148,14 @@ SUBROUTINE LOAD_AND_EXPAND_FIELD_4D(NAME, FIELD, NLON, NLEV, NDIM, NPROMA, NGPTO INTEGER(KIND=JPIM), INTENT(IN) :: NLON, NLEV,NDIM, NPROMA, NGPTOT, NBLOCKS INTEGER(KIND=JPIM), INTENT(IN), OPTIONAL :: NGPTOTG REAL(KIND=JPRB), ALLOCATABLE :: BUFFER(:,:,:) + REAL(KIND=JPRB), POINTER :: PTR(:,:,:,:) INTEGER(KIND=JPIM) :: START, END, SIZE CALL GET_OFFSETS(START, END, SIZE, NLON, NDIM, NLEV, NGPTOT, NGPTOTG) ALLOCATE(BUFFER(SIZE, NLEV, NDIM)) CALL LOAD_ARRAY(NAME, START, END, SIZE, NLON, NLEV, NDIM, BUFFER) - CALL EXPAND(BUFFER, FIELD%PTR, SIZE, NPROMA, NLEV, NDIM, NGPTOT, NBLOCKS) + CALL FIELD%GET_HOST_DATA_RDWR(PTR) + CALL EXPAND(BUFFER, PTR, SIZE, NPROMA, NLEV, NDIM, NGPTOT, NBLOCKS) DEALLOCATE(BUFFER) END SUBROUTINE LOAD_AND_EXPAND_FIELD_4D @@ -304,6 +256,9 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) CALL GET_OFFSETS(START, END, SIZE, KLON, SELF%KLEV, NCLV, NGPTOT, NGPTOTG) + ALLOCATE(SELF%FIELDS_RDONLY(24)) + ALLOCATE(SELF%FIELDS_RWONLY(14)) + IF (LLPACKED) THEN PRINT *, "Packed storage option not yet enabled" ERROR STOP @@ -369,30 +324,30 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) ! SELF%F_PFHPSN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=14) ELSE - CALL FIELD_NEW(SELF%F_PT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PQ, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PVFA, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PVFL, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PVFI, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PDYNA, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PDYNL, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PDYNI, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PHRSW, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PHRLW, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PVERVEL, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PAP, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PLU, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PLUDE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PSNDE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PMFU, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PMFD, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PA, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PSUPSAT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PLCRIT_AER, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PICRIT_AER, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PRE_ICE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PCCN, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PNICE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(1)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(2)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(3)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(4)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(5)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(6)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(7)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(8)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(9)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(10)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(11)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(12)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(13)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(14)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(15)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(16)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(17)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(18)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(19)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(20)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(21)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(22)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(23)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(24)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) ! Custom fields that do not share shape or data type with the other blocks CALL FIELD_NEW(SELF%F_PAPH, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE.) @@ -400,66 +355,67 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) CALL FIELD_NEW(SELF%F_LDCUM, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%F_KTYPE, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%F_PCLV, UBOUNDS=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%F_PCOVPTOT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - SELF%F_PRAINFRAC_TOPRFZ => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA,SELF%NBLOCKS], ZERO=.TRUE.) + CALL FIELD_NEW(SELF%F_PCOVPTOT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%F_PRAINFRAC_TOPRFZ, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) ! Allocate bulk buffers for output 3D fields - SELF%F_PFSQLF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFSQIF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFCQLNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFCQNNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFSQRF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFSQSF => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFCQRNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFCQSNG => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFSQLTUR => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFSQITUR => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFPLSL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFPLSN => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFHPSL => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%F_PFHPSN => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], ZERO=.TRUE.) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(1)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(2)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(3)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(4)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(5)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(6)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(7)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(8)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(9)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(10)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(11)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(12)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(13)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%FIELDS_RWONLY(14)%PTR, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + END IF ! TODO: For now we treat all fields as single-allocations - SELF%TENDENCY_LOC%F_T => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%TENDENCY_LOC%F_A => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%TENDENCY_LOC%F_Q => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV,SELF%NBLOCKS], ZERO=.TRUE.) - SELF%TENDENCY_LOC%F_CLD => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], ZERO=.TRUE.) + CALL FIELD_NEW(SELF%TENDENCY_LOC%F_T, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%TENDENCY_LOC%F_A, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%TENDENCY_LOC%F_Q, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%TENDENCY_LOC%F_CLD, UBOUNDS=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) CALL FIELD_NEW(SELF%TENDENCY_TMP%F_T, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%TENDENCY_TMP%F_A, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%TENDENCY_TMP%F_Q, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%TENDENCY_TMP%F_CLD, UBOUNDS=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL LOAD_AND_EXPAND_FIELD_3D('PLCRIT_AER', SELF%F_PLCRIT_AER, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PICRIT_AER', SELF%F_PICRIT_AER, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PRE_ICE', SELF%F_PRE_ICE, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PCCN', SELF%F_PCCN, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PNICE', SELF%F_PNICE, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PT', SELF%F_PT, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PQ', SELF%F_PQ, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PVFA', SELF%F_PVFA, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PVFL', SELF%F_PVFL, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PVFI', SELF%F_PVFI, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PDYNA', SELF%F_PDYNA, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PDYNL', SELF%F_PDYNL, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PDYNI', SELF%F_PDYNI, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PHRSW', SELF%F_PHRSW, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PHRLW', SELF%F_PHRLW, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PVERVEL', SELF%F_PVERVEL, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PAP', SELF%F_PAP, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PLCRIT_AER', SELF%FIELDS_RDONLY(20)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PICRIT_AER', SELF%FIELDS_RDONLY(21)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PRE_ICE', SELF%FIELDS_RDONLY(22)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PCCN', SELF%FIELDS_RDONLY(23)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PNICE', SELF%FIELDS_RDONLY(24)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PT', SELF%FIELDS_RDONLY(1)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PQ', SELF%FIELDS_RDONLY(2)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PVFA', SELF%FIELDS_RDONLY(3)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PVFL', SELF%FIELDS_RDONLY(4)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PVFI', SELF%FIELDS_RDONLY(5)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PDYNA', SELF%FIELDS_RDONLY(6)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PDYNL', SELF%FIELDS_RDONLY(7)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PDYNI', SELF%FIELDS_RDONLY(8)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PHRSW', SELF%FIELDS_RDONLY(9)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PHRLW', SELF%FIELDS_RDONLY(10)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PVERVEL', SELF%FIELDS_RDONLY(11)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PAP', SELF%FIELDS_RDONLY(12)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('PAPH', SELF%F_PAPH, KLON, SELF%KLEV+1, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_2D('PLSM', SELF%F_PLSM, KLON, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_LOG2D('LDCUM', SELF%F_LDCUM, KLON, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_INT2D('KTYPE', SELF%F_KTYPE, KLON, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PLU', SELF%F_PLU, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PLUDE', SELF%F_PLUDE, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PSNDE', SELF%F_PSNDE, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PMFU', SELF%F_PMFU, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PMFD', SELF%F_PMFD, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PA', SELF%F_PA, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PLU', SELF%FIELDS_RDONLY(13)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PLUDE', SELF%FIELDS_RDONLY(14)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PSNDE', SELF%FIELDS_RDONLY(15)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PMFU', SELF%FIELDS_RDONLY(16)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PMFD', SELF%FIELDS_RDONLY(17)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PA', SELF%FIELDS_RDONLY(18)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_4D('PCLV', SELF%F_PCLV, KLON, SELF%KLEV, NCLV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PSUPSAT', SELF%F_PSUPSAT, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PSUPSAT', SELF%FIELDS_RDONLY(19)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('TENDENCY_TMP_T', SELF%TENDENCY_TMP%F_T, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('TENDENCY_TMP_A', SELF%TENDENCY_TMP%F_A, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) @@ -543,23 +499,23 @@ SUBROUTINE CLOUDSC_FIELD_STATE_VALIDATE(SELF, NPROMA, NGPTOT, NGPTOTG) CALL INPUT_FINALIZE() ! Actual variable validation - CALL VALIDATE('PLUDE', PLUDE, SELF%F_PLUDE%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PLUDE', PLUDE, SELF%FIELDS_RDONLY(14)%PTR%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) CALL VALIDATE('PCOVPTOT', PCOVPTOT, SELF%F_PCOVPTOT%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) CALL VALIDATE('PRAINFRAC_TOPRFZ', PRAINFRAC_TOPRFZ, SELF%F_PRAINFRAC_TOPRFZ%PTR, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFSQLF', PFSQLF, SELF%F_PFSQLF%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFSQIF', PFSQIF, SELF%F_PFSQIF%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFCQLNG', PFCQLNG, SELF%F_PFCQLNG%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFCQNNG', PFCQNNG, SELF%F_PFCQNNG%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFSQRF', PFSQRF, SELF%F_PFSQRF%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFSQSF', PFSQSF, SELF%F_PFSQSF%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFCQRNG', PFCQRNG, SELF%F_PFCQRNG%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFCQSNG', PFCQSNG, SELF%F_PFCQSNG%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFSQLTUR', PFSQLTUR, SELF%F_PFSQLTUR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFSQITUR', PFSQITUR, SELF%F_PFSQITUR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFPLSL', PFPLSL, SELF%F_PFPLSL%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFPLSN', PFPLSN, SELF%F_PFPLSN%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFHPSL', PFHPSL, SELF%F_PFHPSL%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) - CALL VALIDATE('PFHPSN', PFHPSN, SELF%F_PFHPSN%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFSQLF', PFSQLF, SELF%FIELDS_RWONLY(1)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFSQIF', PFSQIF, SELF%FIELDS_RWONLY(2)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFCQLNG', PFCQLNG, SELF%FIELDS_RWONLY(3)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFCQNNG', PFCQNNG, SELF%FIELDS_RWONLY(4)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFSQRF', PFSQRF, SELF%FIELDS_RWONLY(5)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFSQSF', PFSQSF, SELF%FIELDS_RWONLY(6)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFCQRNG', PFCQRNG, SELF%FIELDS_RWONLY(7)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFCQSNG', PFCQSNG, SELF%FIELDS_RWONLY(8)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFSQLTUR', PFSQLTUR, SELF%FIELDS_RWONLY(9)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFSQITUR', PFSQITUR, SELF%FIELDS_RWONLY(10)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFPLSL', PFPLSL, SELF%FIELDS_RWONLY(11)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFPLSN', PFPLSN, SELF%FIELDS_RWONLY(12)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFHPSL', PFHPSL, SELF%FIELDS_RWONLY(13)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PFHPSN', PFHPSN, SELF%FIELDS_RWONLY(14)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) CALL VALIDATE('TENDENCY_LOC%A', B_LOC(:,:,2,:), SELF%TENDENCY_LOC%F_A%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) CALL VALIDATE('TENDENCY_LOC%Q', B_LOC(:,:,3,:), SELF%TENDENCY_LOC%F_Q%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) @@ -573,34 +529,14 @@ SUBROUTINE CLOUDSC_FIELD_STATE_FINALIZE(SELF, USE_PACKED) CLASS(CLOUDSC_FIELD_STATE) :: SELF ! Use this toggle to switch between standalone fields and bulk-allocated ones. LOGICAL, INTENT(IN), OPTIONAL :: USE_PACKED + INTEGER :: IFIELD IF(USE_PACKED)THEN ELSE - CALL FIELD_DELETE(SELF%F_PT) - CALL FIELD_DELETE(SELF%F_PQ) - CALL FIELD_DELETE(SELF%F_PVFA) - CALL FIELD_DELETE(SELF%F_PVFL) - CALL FIELD_DELETE(SELF%F_PVFI) - CALL FIELD_DELETE(SELF%F_PDYNA) - CALL FIELD_DELETE(SELF%F_PDYNL) - CALL FIELD_DELETE(SELF%F_PDYNI) - CALL FIELD_DELETE(SELF%F_PHRSW) - CALL FIELD_DELETE(SELF%F_PHRLW) - CALL FIELD_DELETE(SELF%F_PVERVEL) - CALL FIELD_DELETE(SELF%F_PAP) - CALL FIELD_DELETE(SELF%F_PLU) - CALL FIELD_DELETE(SELF%F_PLUDE) - CALL FIELD_DELETE(SELF%F_PSNDE) - CALL FIELD_DELETE(SELF%F_PMFU) - CALL FIELD_DELETE(SELF%F_PMFD) - CALL FIELD_DELETE(SELF%F_PA) - CALL FIELD_DELETE(SELF%F_PSUPSAT) - CALL FIELD_DELETE(SELF%F_PLCRIT_AER) - CALL FIELD_DELETE(SELF%F_PICRIT_AER) - CALL FIELD_DELETE(SELF%F_PRE_ICE) - CALL FIELD_DELETE(SELF%F_PCCN) - CALL FIELD_DELETE(SELF%F_PNICE) + DO IFIELD=1,24 + CALL FIELD_DELETE(SELF%FIELDS_RDONLY(IFIELD)%PTR) + ENDDO CALL FIELD_DELETE(SELF%F_PAPH) CALL FIELD_DELETE(SELF%F_PLSM) @@ -610,20 +546,9 @@ SUBROUTINE CLOUDSC_FIELD_STATE_FINALIZE(SELF, USE_PACKED) CALL FIELD_DELETE(SELF%F_PCOVPTOT) CALL FIELD_DELETE(SELF%F_PRAINFRAC_TOPRFZ) - CALL FIELD_DELETE(SELF%F_PFSQLF) - CALL FIELD_DELETE(SELF%F_PFSQIF) - CALL FIELD_DELETE(SELF%F_PFCQLNG) - CALL FIELD_DELETE(SELF%F_PFCQNNG) - CALL FIELD_DELETE(SELF%F_PFSQRF) - CALL FIELD_DELETE(SELF%F_PFSQSF) - CALL FIELD_DELETE(SELF%F_PFCQRNG) - CALL FIELD_DELETE(SELF%F_PFCQSNG) - CALL FIELD_DELETE(SELF%F_PFSQLTUR) - CALL FIELD_DELETE(SELF%F_PFSQITUR) - CALL FIELD_DELETE(SELF%F_PFPLSL) - CALL FIELD_DELETE(SELF%F_PFPLSN) - CALL FIELD_DELETE(SELF%F_PFHPSL) - CALL FIELD_DELETE(SELF%F_PFHPSN) + DO IFIELD=1,14 + CALL FIELD_DELETE(SELF%FIELDS_RWONLY(IFIELD)%PTR) + ENDDO CALL FIELD_DELETE(SELF%TENDENCY_LOC%F_T) CALL FIELD_DELETE(SELF%TENDENCY_LOC%F_A) @@ -636,6 +561,9 @@ SUBROUTINE CLOUDSC_FIELD_STATE_FINALIZE(SELF, USE_PACKED) CALL FIELD_DELETE(SELF%TENDENCY_TMP%F_CLD) ENDIF + DEALLOCATE(SELF%FIELDS_RDONLY) + DEALLOCATE(SELF%FIELDS_RWONLY) + END SUBROUTINE CLOUDSC_FIELD_STATE_FINALIZE END MODULE CLOUDSC_FIELD_STATE_MOD From ae1b903966a7ef2a94c31aa3c70ab40480e04ba4 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Thu, 23 Nov 2023 13:22:15 +0000 Subject: [PATCH 140/174] FIELD_API fixes for single-precision --- CMakeLists.txt | 2 ++ bundle.yml | 2 ++ src/common/CMakeLists.txt | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee1533a9..f51f915b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,8 +117,10 @@ ecbuild_find_package( NAME atlas ) ecbuild_add_option( FEATURE SINGLE_PRECISION DESCRIPTION "Build CLOUDSC in single precision" DEFAULT OFF ) +set(prec dp) if( HAVE_SINGLE_PRECISION ) list(APPEND CLOUDSC_DEFINITIONS SINGLE) + set(prec sp) endif() # build executables diff --git a/bundle.yml b/bundle.yml index ac985b09..8eac0650 100644 --- a/bundle.yml +++ b/bundle.yml @@ -90,9 +90,11 @@ options : cmake : CMAKE_TOOLCHAIN_FILE={{value}} - single-precision : + # Disabling DOUBLE_PRECISION only affects field_api help : Enable single precision build of the dwarf cmake : > ENABLE_SINGLE_PRECISION=ON + ENABLE_DOUBLE_PRECISION=OFF FIELD_API_DEFINITIONS=SINGLE - with-gpu : diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index f5c11dc0..0ed2c3e1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -94,7 +94,7 @@ ecbuild_add_library( TARGET cloudsc-common-lib $<${HAVE_MPI}:MPI::MPI_Fortran> $<${HAVE_HDF5}:hdf5::hdf5_fortran> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_Fortran> - $<${HAVE_FIELD_API}:field_api_dp> # FIELD_API uses parkind1 from CLOUDSC, so both dp and sp variants are identical + $<${HAVE_FIELD_API}:field_api_${prec}> ) if( HAVE_CUDA ) From ec8145b031e74a9aef5b43edcad5dce7dbf14e75 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Wed, 13 Sep 2023 13:08:11 +0000 Subject: [PATCH 141/174] Re-enabled CLOUDSC_PACKED_STORAGE option --- .../cloudsc_driver_gpu_scc_field_mod.F90 | 40 +++-- src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 | 2 +- src/common/module/cloudsc_field_state_mod.F90 | 146 +++++++----------- 3 files changed, 82 insertions(+), 106 deletions(-) diff --git a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 index 59aefbe7..220f6363 100644 --- a/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 +++ b/src/cloudsc_gpu/cloudsc_driver_gpu_scc_field_mod.F90 @@ -24,7 +24,7 @@ MODULE CLOUDSC_DRIVER_GPU_SCC_FIELD_MOD CONTAINS SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & - & NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG, KFLDX, PTSPHY, FIELD_STATE & + & NUMOMP, NPROMA, NLEV, NGPTOT, NGPBLKS, NGPTOTG, KFLDX, PTSPHY, FIELD_STATE, USE_PACKED & & ) ! Driver routine that invokes the optimized CLAW-based CLOUDSC GPU kernel @@ -32,6 +32,7 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & INTEGER(KIND=JPIM) :: KFLDX REAL(KIND=JPRB) :: PTSPHY ! Physics timestep TYPE(CLOUDSC_FIELD_STATE), INTENT(INOUT) :: FIELD_STATE + LOGICAL, INTENT(IN) :: USE_PACKED REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PT(:,:,:) ! T at start of callpar REAL(KIND=JPRB), POINTER, CONTIGUOUS :: PQ(:,:,:) ! Q at start of callpar @@ -102,6 +103,13 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & ! Global timer for the parallel region CALL TIMER%START(NUMOMP) + IF(USE_PACKED)THEN + CALL FIELD_STATE%DATA_RDONLY%SYNC_DEVICE_RDONLY() + CALL FIELD_STATE%DATA_RWONLY%SYNC_DEVICE_RDWR() + ! If this is called then the subsequent FIELD_STATE%FIELDS_RDONLY/RWONLY%PTR%GET_DEVICE_DATA() + ! calls don't trigger any data movement, they just return an updated device pointer + ENDIF + CALL FIELD_STATE%FIELDS_RDONLY(1)%PTR%GET_DEVICE_DATA_RDONLY(PT) CALL FIELD_STATE%FIELDS_RDONLY(2)%PTR%GET_DEVICE_DATA_RDONLY(PQ) CALL FIELD_STATE%FIELDS_RDONLY(3)%PTR%GET_DEVICE_DATA_RDONLY(PVFA) @@ -119,23 +127,23 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & CALL FIELD_STATE%F_LDCUM%GET_DEVICE_DATA_RDONLY(LDCUM) CALL FIELD_STATE%F_KTYPE%GET_DEVICE_DATA_RDONLY(KTYPE) CALL FIELD_STATE%FIELDS_RDONLY(13)%PTR%GET_DEVICE_DATA_RDONLY(PLU) - CALL FIELD_STATE%FIELDS_RDONLY(15)%PTR%GET_DEVICE_DATA_RDONLY(PSNDE) - CALL FIELD_STATE%FIELDS_RDONLY(16)%PTR%GET_DEVICE_DATA_RDONLY(PMFU) - CALL FIELD_STATE%FIELDS_RDONLY(17)%PTR%GET_DEVICE_DATA_RDONLY(PMFD) - CALL FIELD_STATE%FIELDS_RDONLY(18)%PTR%GET_DEVICE_DATA_RDONLY(PA) + CALL FIELD_STATE%FIELDS_RDONLY(14)%PTR%GET_DEVICE_DATA_RDONLY(PSNDE) + CALL FIELD_STATE%FIELDS_RDONLY(15)%PTR%GET_DEVICE_DATA_RDONLY(PMFU) + CALL FIELD_STATE%FIELDS_RDONLY(16)%PTR%GET_DEVICE_DATA_RDONLY(PMFD) + CALL FIELD_STATE%FIELDS_RDONLY(17)%PTR%GET_DEVICE_DATA_RDONLY(PA) CALL FIELD_STATE%F_PCLV%GET_DEVICE_DATA_RDONLY(PCLV) - CALL FIELD_STATE%FIELDS_RDONLY(19)%PTR%GET_DEVICE_DATA_RDONLY(PSUPSAT) - CALL FIELD_STATE%FIELDS_RDONLY(20)%PTR%GET_DEVICE_DATA_RDONLY(PLCRIT_AER) - CALL FIELD_STATE%FIELDS_RDONLY(21)%PTR%GET_DEVICE_DATA_RDONLY(PICRIT_AER) - CALL FIELD_STATE%FIELDS_RDONLY(22)%PTR%GET_DEVICE_DATA_RDONLY(PRE_ICE) - CALL FIELD_STATE%FIELDS_RDONLY(23)%PTR%GET_DEVICE_DATA_RDONLY(PCCN) - CALL FIELD_STATE%FIELDS_RDONLY(24)%PTR%GET_DEVICE_DATA_RDONLY(PNICE) + CALL FIELD_STATE%FIELDS_RDONLY(18)%PTR%GET_DEVICE_DATA_RDONLY(PSUPSAT) + CALL FIELD_STATE%FIELDS_RDONLY(19)%PTR%GET_DEVICE_DATA_RDONLY(PLCRIT_AER) + CALL FIELD_STATE%FIELDS_RDONLY(20)%PTR%GET_DEVICE_DATA_RDONLY(PICRIT_AER) + CALL FIELD_STATE%FIELDS_RDONLY(21)%PTR%GET_DEVICE_DATA_RDONLY(PRE_ICE) + CALL FIELD_STATE%FIELDS_RDONLY(22)%PTR%GET_DEVICE_DATA_RDONLY(PCCN) + CALL FIELD_STATE%FIELDS_RDONLY(23)%PTR%GET_DEVICE_DATA_RDONLY(PNICE) CALL FIELD_STATE%TENDENCY_TMP%F_T%GET_DEVICE_DATA_RDONLY(TEND_TMP_T) CALL FIELD_STATE%TENDENCY_TMP%F_Q%GET_DEVICE_DATA_RDONLY(TEND_TMP_Q) CALL FIELD_STATE%TENDENCY_TMP%F_A%GET_DEVICE_DATA_RDONLY(TEND_TMP_A) CALL FIELD_STATE%TENDENCY_TMP%F_CLD%GET_DEVICE_DATA_RDONLY(TEND_TMP_CLD) - CALL FIELD_STATE%FIELDS_RDONLY(14)%PTR%GET_DEVICE_DATA_RDWR(PLUDE) + CALL FIELD_STATE%F_PLUDE%GET_DEVICE_DATA_RDWR(PLUDE) CALL FIELD_STATE%F_PCOVPTOT%GET_DEVICE_DATA_RDWR(PCOVPTOT) CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%GET_DEVICE_DATA_RDWR(PRAINFRAC_TOPRFZ) CALL FIELD_STATE%FIELDS_RWONLY(1)%PTR%GET_DEVICE_DATA_RDWR(PFSQLF) @@ -213,7 +221,13 @@ SUBROUTINE CLOUDSC_DRIVER_GPU_SCC_FIELD( & CALL TIMER%THREAD_END(TID) - CALL FIELD_STATE%FIELDS_RDONLY(14)%PTR%SYNC_HOST_RDWR() + IF(USE_PACKED)THEN + CALL FIELD_STATE%DATA_RWONLY%SYNC_HOST_RDWR() + ! If this is called then the subsequent FIELD_STATE%FIELDS_RWONLY%PTR%SYNC_HOST_RDWR() calls + ! don't trigger any data movement + ENDIF + + CALL FIELD_STATE%F_PLUDE%SYNC_HOST_RDWR() CALL FIELD_STATE%F_PCOVPTOT%SYNC_HOST_RDWR() CALL FIELD_STATE%F_PRAINFRAC_TOPRFZ%SYNC_HOST_RDWR() CALL FIELD_STATE%FIELDS_RWONLY(1)%PTR%SYNC_HOST_RDWR() diff --git a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 index 04f085fe..17de8ae8 100644 --- a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 +++ b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 @@ -285,7 +285,7 @@ PROGRAM DWARF_CLOUDSC ! Call the driver to perform the parallel loop over our kernel CALL CLOUDSC_DRIVER_GPU_SCC_FIELD( & & NUMOMP, NPROMA, GLOBAL_STATE%KLEV, NGPTOT, GLOBAL_STATE%NBLOCKS, NGPTOTG, & - & GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, GLOBAL_STATE & + & GLOBAL_STATE%KFLDX, GLOBAL_STATE%PTSPHY, GLOBAL_STATE, USE_PACKED & & ) #endif diff --git a/src/common/module/cloudsc_field_state_mod.F90 b/src/common/module/cloudsc_field_state_mod.F90 index e1dd4e62..ebe7fa98 100644 --- a/src/common/module/cloudsc_field_state_mod.F90 +++ b/src/common/module/cloudsc_field_state_mod.F90 @@ -39,14 +39,14 @@ MODULE CLOUDSC_FIELD_STATE_MOD REAL(KIND=JPRB), ALLOCATABLE :: B_TMP(:,:,:,:) REAL(KIND=JPRB), ALLOCATABLE :: B_LOC(:,:,:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA_RDONLY(:,:,:,:) - REAL(KIND=JPRB), POINTER, CONTIGUOUS :: DATA_RWONLY(:,:,:,:) + CLASS(FIELD_4RB), POINTER :: DATA_RDONLY + CLASS(FIELD_4RB), POINTER :: DATA_RWONLY ! Storage fields to provide thread-local views CLASS(FIELD_2RB), POINTER :: F_PRAINFRAC_TOPRFZ, F_PLSM CLASS(FIELD_2IM), POINTER :: F_KTYPE CLASS(FIELD_2LM), POINTER :: F_LDCUM - CLASS(FIELD_3RB), POINTER :: F_PAPH, F_PCOVPTOT + CLASS(FIELD_3RB), POINTER :: F_PAPH, F_PCOVPTOT, F_PLUDE CLASS(FIELD_4RB), POINTER :: F_PCLV TYPE(FIELD_3RB_PTR), ALLOCATABLE :: FIELDS_RDONLY(:) @@ -64,16 +64,6 @@ MODULE CLOUDSC_FIELD_STATE_MOD CONTAINS -! FUNCTION CREATE_FIELD_WRAP_PACKED_3D(DATA, IDX) RESULT(FIELD_PTR) -! ! Create a single 3D field with implicit blocking dimension by wrapping existing data -! CLASS(FIELD_3D_WRAPPER_PACKED), POINTER :: FIELD_PTR -! REAL(KIND=JPRB), TARGET, INTENT(IN) :: DATA(:,:,:,:) -! INTEGER(KIND=JPIM), INTENT(IN) :: IDX -! -! ALLOCATE(FIELD_PTR) -! CALL FIELD_PTR%INIT(DATA=DATA, IDX=IDX) -! END FUNCTION CREATE_FIELD_WRAP_PACKED_3D - SUBROUTINE LOAD_AND_EXPAND_FIELD_2D(NAME, FIELD, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CHARACTER(LEN=*), INTENT(IN) :: NAME CLASS(FIELD_2RB), INTENT(INOUT) :: FIELD @@ -256,74 +246,33 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) CALL GET_OFFSETS(START, END, SIZE, KLON, SELF%KLEV, NCLV, NGPTOT, NGPTOTG) - ALLOCATE(SELF%FIELDS_RDONLY(24)) - ALLOCATE(SELF%FIELDS_RWONLY(14)) - IF (LLPACKED) THEN - PRINT *, "Packed storage option not yet enabled" - ERROR STOP + ! Allocate bulk buffers for read-only input 3D fields + NFIELDS = 23 + CALL FIELD_NEW(SELF%DATA_RDONLY, SELF%FIELDS_RDONLY, UBOUNDS=[NPROMA, SELF%KLEV, NFIELDS, SELF%NBLOCKS], & + & PERSISTENT=.TRUE.) -! ! Allocate bulk buffers for read-only input 3D fields -! NFIELDS = 24 -! ! ALLOCATE(SELF%DATA_RDONLY(NPROMA, SELF%KLEV, NFIELDS, SELF%NBLOCKS)) -! SELF%DATA_RDONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV, NFIELDS], SELF%NBLOCKS) -! -! SELF%F_PT => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=1) -! SELF%F_PQ => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=2) -! SELF%F_PVFA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=3) -! SELF%F_PVFL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=4) -! SELF%F_PVFI => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=5) -! SELF%F_PDYNA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=6) -! SELF%F_PDYNL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=7) -! SELF%F_PDYNI => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=8) -! SELF%F_PHRSW => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=9) -! SELF%F_PHRLW => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=10) -! SELF%F_PVERVEL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=11) -! SELF%F_PAP => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=12) -! SELF%F_PLU => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=13) -! SELF%F_PLUDE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=14) -! SELF%F_PSNDE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=15) -! SELF%F_PMFU => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=16) -! SELF%F_PMFD => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=17) -! SELF%F_PA => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=18) -! SELF%F_PSUPSAT => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=19) -! SELF%F_PLCRIT_AER => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=20) -! SELF%F_PICRIT_AER => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=21) -! SELF%F_PRE_ICE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=22) -! SELF%F_PCCN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=23) -! SELF%F_PNICE => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RDONLY, IDX=24) -! -! ! Custom fields that do not share shape or data type with the other blocks -! SELF%F_PAPH => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV+1], NBLOCKS=SELF%NBLOCKS) -! SELF%F_PLSM => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) -! SELF%F_LDCUM => CREATE_FIELD_ALLOCATE_LOG2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) -! SELF%F_KTYPE => CREATE_FIELD_ALLOCATE_INT2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS) -! SELF%F_PCLV => CREATE_FIELD_ALLOCATE_4D(SHAPE=[NPROMA,SELF%KLEV,NCLV], NBLOCKS=SELF%NBLOCKS) -! SELF%F_PCOVPTOT => CREATE_FIELD_ALLOCATE_3D(SHAPE=[NPROMA,SELF%KLEV], NBLOCKS=SELF%NBLOCKS) -! SELF%F_PRAINFRAC_TOPRFZ => CREATE_FIELD_ALLOCATE_2D(SHAPE=[NPROMA], NBLOCKS=SELF%NBLOCKS, ZERO=.TRUE.) -! -! ! Allocate bulk buffers for output 3D fields -! NFIELDS = 14 -! ! CALL FIELD_INIT(SELF%DATA_RWONLY, NPROMA, SELF%KLEV+1, NFIELDS, SELF%NBLOCKS) -! SELF%DATA_RWONLY => MALLOC_HOST_PINNED_4D([NPROMA, SELF%KLEV+1, NFIELDS], SELF%NBLOCKS) -! SELF%DATA_RWONLY(:,:,:,:) = 0.0_JPRB -! -! SELF%F_PFSQLF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=1) -! SELF%F_PFSQIF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=2) -! SELF%F_PFCQLNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=3) -! SELF%F_PFCQNNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=4) -! SELF%F_PFSQRF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=5) -! SELF%F_PFSQSF => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=6) -! SELF%F_PFCQRNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=7) -! SELF%F_PFCQSNG => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=8) -! SELF%F_PFSQLTUR => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=9) -! SELF%F_PFSQITUR => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=10) -! SELF%F_PFPLSL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=11) -! SELF%F_PFPLSN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=12) -! SELF%F_PFHPSL => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=13) -! SELF%F_PFHPSN => CREATE_FIELD_WRAP_PACKED_3D(DATA=SELF%DATA_RWONLY, IDX=14) + ! This is a RDWR field, so does not belong in either of the buffers + CALL FIELD_NEW(SELF%F_PLUDE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + + ! Custom fields that do not share shape or data type with the other blocks + CALL FIELD_NEW(SELF%F_PAPH, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PLSM, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_LDCUM, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_KTYPE, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PCLV, UBOUNDS=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + CALL FIELD_NEW(SELF%F_PCOVPTOT, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + CALL FIELD_NEW(SELF%F_PRAINFRAC_TOPRFZ, UBOUNDS=[NPROMA,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) + + ! Allocate bulk buffers for output 3D fields + NFIELDS = 14 + CALL FIELD_NEW(SELF%DATA_RWONLY, SELF%FIELDS_RWONLY, UBOUNDS=[NPROMA, SELF%KLEV+1, NFIELDS, SELF%NBLOCKS], & + & PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) ELSE + ALLOCATE(SELF%FIELDS_RDONLY(23)) + ALLOCATE(SELF%FIELDS_RWONLY(14)) + CALL FIELD_NEW(SELF%FIELDS_RDONLY(1)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%FIELDS_RDONLY(2)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%FIELDS_RDONLY(3)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) @@ -347,7 +296,9 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) CALL FIELD_NEW(SELF%FIELDS_RDONLY(21)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%FIELDS_RDONLY(22)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%FIELDS_RDONLY(23)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL FIELD_NEW(SELF%FIELDS_RDONLY(24)%PTR, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) + + ! This is a RDWR field, so does not belong in either of the buffers + CALL FIELD_NEW(SELF%F_PLUDE, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE., INIT_VALUE=0._JPRB) ! Custom fields that do not share shape or data type with the other blocks CALL FIELD_NEW(SELF%F_PAPH, UBOUNDS=[NPROMA,SELF%KLEV+1,SELF%NBLOCKS], PERSISTENT=.TRUE.) @@ -387,11 +338,11 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) CALL FIELD_NEW(SELF%TENDENCY_TMP%F_Q, UBOUNDS=[NPROMA,SELF%KLEV,SELF%NBLOCKS], PERSISTENT=.TRUE.) CALL FIELD_NEW(SELF%TENDENCY_TMP%F_CLD, UBOUNDS=[NPROMA,SELF%KLEV,NCLV,SELF%NBLOCKS], PERSISTENT=.TRUE.) - CALL LOAD_AND_EXPAND_FIELD_3D('PLCRIT_AER', SELF%FIELDS_RDONLY(20)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PICRIT_AER', SELF%FIELDS_RDONLY(21)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PRE_ICE', SELF%FIELDS_RDONLY(22)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PCCN', SELF%FIELDS_RDONLY(23)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PNICE', SELF%FIELDS_RDONLY(24)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PLCRIT_AER', SELF%FIELDS_RDONLY(19)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PICRIT_AER', SELF%FIELDS_RDONLY(20)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PRE_ICE', SELF%FIELDS_RDONLY(21)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PCCN', SELF%FIELDS_RDONLY(22)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PNICE', SELF%FIELDS_RDONLY(23)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('PT', SELF%FIELDS_RDONLY(1)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('PQ', SELF%FIELDS_RDONLY(2)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('PVFA', SELF%FIELDS_RDONLY(3)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) @@ -409,13 +360,13 @@ SUBROUTINE CLOUDSC_FIELD_STATE_LOAD(SELF, NPROMA, NGPTOT, NGPTOTG, USE_PACKED) CALL LOAD_AND_EXPAND_FIELD_LOG2D('LDCUM', SELF%F_LDCUM, KLON, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_INT2D('KTYPE', SELF%F_KTYPE, KLON, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('PLU', SELF%FIELDS_RDONLY(13)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PLUDE', SELF%FIELDS_RDONLY(14)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PSNDE', SELF%FIELDS_RDONLY(15)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PMFU', SELF%FIELDS_RDONLY(16)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PMFD', SELF%FIELDS_RDONLY(17)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PA', SELF%FIELDS_RDONLY(18)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PLUDE', SELF%F_PLUDE, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PSNDE', SELF%FIELDS_RDONLY(14)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PMFU', SELF%FIELDS_RDONLY(15)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PMFD', SELF%FIELDS_RDONLY(16)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PA', SELF%FIELDS_RDONLY(17)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_4D('PCLV', SELF%F_PCLV, KLON, SELF%KLEV, NCLV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) - CALL LOAD_AND_EXPAND_FIELD_3D('PSUPSAT', SELF%FIELDS_RDONLY(19)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) + CALL LOAD_AND_EXPAND_FIELD_3D('PSUPSAT', SELF%FIELDS_RDONLY(18)%PTR, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('TENDENCY_TMP_T', SELF%TENDENCY_TMP%F_T, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) CALL LOAD_AND_EXPAND_FIELD_3D('TENDENCY_TMP_A', SELF%TENDENCY_TMP%F_A, KLON, SELF%KLEV, NPROMA, NGPTOT, SELF%NBLOCKS, NGPTOTG) @@ -499,7 +450,7 @@ SUBROUTINE CLOUDSC_FIELD_STATE_VALIDATE(SELF, NPROMA, NGPTOT, NGPTOTG) CALL INPUT_FINALIZE() ! Actual variable validation - CALL VALIDATE('PLUDE', PLUDE, SELF%FIELDS_RDONLY(14)%PTR%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) + CALL VALIDATE('PLUDE', PLUDE, SELF%F_PLUDE%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) CALL VALIDATE('PCOVPTOT', PCOVPTOT, SELF%F_PCOVPTOT%PTR, NPROMA, SELF%KLEV, NGPTOT, NBLOCKS, NGPTOTG) CALL VALIDATE('PRAINFRAC_TOPRFZ', PRAINFRAC_TOPRFZ, SELF%F_PRAINFRAC_TOPRFZ%PTR, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CALL VALIDATE('PFSQLF', PFSQLF, SELF%FIELDS_RWONLY(1)%PTR%PTR, NPROMA, SELF%KLEV+1, NGPTOT, NBLOCKS, NGPTOTG) @@ -528,16 +479,27 @@ SUBROUTINE CLOUDSC_FIELD_STATE_FINALIZE(SELF, USE_PACKED) ! Validate the correctness of output against reference data CLASS(CLOUDSC_FIELD_STATE) :: SELF ! Use this toggle to switch between standalone fields and bulk-allocated ones. - LOGICAL, INTENT(IN), OPTIONAL :: USE_PACKED + LOGICAL, INTENT(IN) :: USE_PACKED INTEGER :: IFIELD IF(USE_PACKED)THEN + CALL FIELD_DELETE(SELF%DATA_RDONLY) + CALL FIELD_DELETE(SELF%DATA_RWONLY) + CALL FIELD_DELETE(SELF%F_PLUDE) + CALL FIELD_DELETE(SELF%F_PAPH) + CALL FIELD_DELETE(SELF%F_PLSM) + CALL FIELD_DELETE(SELF%F_LDCUM) + CALL FIELD_DELETE(SELF%F_KTYPE) + CALL FIELD_DELETE(SELF%F_PCLV) + CALL FIELD_DELETE(SELF%F_PCOVPTOT) + CALL FIELD_DELETE(SELF%F_PRAINFRAC_TOPRFZ) ELSE - DO IFIELD=1,24 + DO IFIELD=1,23 CALL FIELD_DELETE(SELF%FIELDS_RDONLY(IFIELD)%PTR) ENDDO + CALL FIELD_DELETE(SELF%F_PLUDE) CALL FIELD_DELETE(SELF%F_PAPH) CALL FIELD_DELETE(SELF%F_PLSM) CALL FIELD_DELETE(SELF%F_LDCUM) From 28ea447c7ca86cc60eafbae3097acb1cada171c9 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Fri, 2 Feb 2024 11:51:31 +0000 Subject: [PATCH 142/174] Update README with link to open-source FIELD_API --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a217b2dd..565da03e 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int) C version of CLOUDSC including loop fusion and temporary local array demotion. - **dwarf-cloudsc-gpu-scc-field**: GPU-enabled and optimized version of - CLOUDSC that uses the SCC loop layout, and uses [FIELD API](https://git.ecmwf.int/projects/RDX/repos/field_api/browse) (a Fortran library purpose-built for IFS data-structures that facilitates the + CLOUDSC that uses the SCC loop layout, and uses [FIELD API](https://github.com/ecmwf-ifs/field_api) (a Fortran library purpose-built for IFS data-structures that facilitates the creation and management of field objects in scientific code) to perform device offload and copyback. The intent is to demonstrate the explicit use of pinned host memory to speed-up data transfers, as provided by the shipped prototype implmentation, and From b14db4e9e7590ee1f583f6ebaf2fa62aed578c5b Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 5 Feb 2024 10:56:27 +0000 Subject: [PATCH 143/174] Set ENABLE_SINGLE_PRECISION=OFF as a global option --- bundle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle.yml b/bundle.yml index 8eac0650..aa74ffce 100644 --- a/bundle.yml +++ b/bundle.yml @@ -11,6 +11,7 @@ cmake : > BUILD_fckit=OFF BUILD_atlas=OFF ENABLE_OMP=ON + ENABLE_SINGLE_PRECISION=OFF projects : @@ -74,7 +75,6 @@ projects : optional: true require : ecbuild cmake : > - ENABLE_SINGLE_PRECISION=OFF UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module - cloudsc-dwarf : From 39d26be8f7c4b95182bd04e847224f42e5165805 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 5 Feb 2024 11:35:49 +0000 Subject: [PATCH 144/174] Don't use FCKIT provided fypp to build FIELD_API --- bundle.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bundle.yml b/bundle.yml index aa74ffce..9f57c878 100644 --- a/bundle.yml +++ b/bundle.yml @@ -52,6 +52,14 @@ projects : ECKIT_ENABLE_BUILD_TOOLS=OFF ECKIT_ENABLE_CUDA=OFF + - field_api : + git : https://github.com/ecmwf-ifs/field_api.git + version : main + optional: true + require : ecbuild + cmake : > + UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module + - fckit : git : https://github.com/ecmwf/fckit version : 0.11.0 @@ -69,14 +77,6 @@ projects : ATLAS_ENABLE_TESTS=OFF ATLAS_ENABLE_CUDA=OFF - - field_api : - git : https://github.com/ecmwf-ifs/field_api.git - version : main - optional: true - require : ecbuild - cmake : > - UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module - - cloudsc-dwarf : # The CLOUDSC dwarf project with multiple implementations dir : $PWD From 9516da38a8cb0cf6a99b6487eedf2f36e223dda2 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 5 Feb 2024 16:31:15 +0000 Subject: [PATCH 145/174] Use -acc=gpu to build offloaded variants in CI --- arch/toolchains/github-ubuntu-nvhpc.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/toolchains/github-ubuntu-nvhpc.cmake b/arch/toolchains/github-ubuntu-nvhpc.cmake index 8e5b0d9f..ebb783d7 100644 --- a/arch/toolchains/github-ubuntu-nvhpc.cmake +++ b/arch/toolchains/github-ubuntu-nvhpc.cmake @@ -24,7 +24,7 @@ set( OpenMP_C_FLAGS "-mp -mp=bind,allcores,numa" CACHE STRING "" ) # NB: We have to add `-mp` again to avoid undefined symbols during linking # (smells like an Nvidia bug) -set( OpenACC_Fortran_FLAGS "-acc=host -mp" CACHE STRING "" ) +set( OpenACC_Fortran_FLAGS "-acc=gpu -mp" CACHE STRING "" ) # Enable this to get more detailed compiler output # set( OpenACC_Fortran_FLAGS "${OpenACC_Fortran_FLAGS} -Minfo" ) From d66b5f57104aafb6fc7a880c6faeec566152cc39 Mon Sep 17 00:00:00 2001 From: Ahmad Nawab Date: Mon, 5 Feb 2024 16:33:17 +0000 Subject: [PATCH 146/174] Don't test CUDA variants built with nvhpc in CI --- .github/workflows/build.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e9ad5729..e3260f6b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,6 +50,11 @@ jobs: build_flags: '--cloudsc-fortran-pyiface=ON --cloudsc-python-f2py=ON' # Add nvhpc build configurations with serialbox and HDF5 + - arch: nvhpc/21.9 + nvhpc_version: 21.9 + io_library_flag: '' + build_flags: '--with-gpu --with-loki --cmake="ENABLE_ACC=OFF"' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda-' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE - arch: nvhpc/21.9 nvhpc_version: 21.9 io_library_flag: '' @@ -66,6 +71,11 @@ jobs: build_flags: '--with-gpu --with-loki --with-cuda --with-atlas' ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-cuda' # GPU variants don't work on CPU runners, loki-c variant causes SIGFPE + - arch: nvhpc/23.5 + nvhpc_version: 23.5 + io_library_flag: '' + build_flags: '--with-gpu --with-loki --cmake="ENABLE_ACC=OFF"' + ctest_exclude_pattern: '-gpu-|-scc-|-loki-c|-loki-sca|-cuda-' # GPU variants don't work on CPU runners, loki-c and loki-sca variant causes SIGFPE - arch: nvhpc/23.5 nvhpc_version: 23.5 io_library_flag: '' @@ -159,7 +169,7 @@ jobs: # Run ctest - name: Run CTest - if: ${{ !( contains(matrix.build_flags, '--single-precision') || (contains(matrix.build_flags, '--with-cuda') && contains(matrix.build_flags, '--with-atlas')) ) }} + if: ${{ !( contains(matrix.build_flags, '--single-precision') || (contains(matrix.build_flags, '--with-cuda') && contains(matrix.arch, 'nvhpc')) ) }} working-directory: ./build run: | source env.sh From 995cadb190fe0a2ed1f137d978cfcd05ad9bc611 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Wed, 7 Feb 2024 08:44:11 +0000 Subject: [PATCH 147/174] beautifying: fix alignment of the printed table for CUDA variants --- src/cloudsc_cuda/cloudsc/cloudsc_driver.cu | 2 +- src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu b/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu index 5ac91de2..96d04519 100644 --- a/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu +++ b/src/cloudsc_cuda/cloudsc/cloudsc_driver.cu @@ -484,7 +484,7 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zmflops = 0.0; zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d TOTAL\n", numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, diff --git a/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu b/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu index 615cdf25..152abb1e 100644 --- a/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu +++ b/src/cloudsc_cuda/cloudsc/cloudsc_driver_hoist.cu @@ -525,7 +525,7 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) { zmflops = 0.0; zthrput = 0.0; } - printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n", + printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d TOTAL\n", numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput); cloudsc_validate(klon, nlev, nclv, numcols, nproma, From b42c36f6b20ef5c4e600b0cd91c3b0fd9f1efce5 Mon Sep 17 00:00:00 2001 From: Slavko Brdar Date: Wed, 7 Feb 2024 16:51:40 +0000 Subject: [PATCH 148/174] 1) docu on CLOUDSC_ATLAS_MULTIFIELD; 2) code cleanup (tnx Balthasar) --- README.md | 3 +++ src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 | 5 ----- src/cloudsc_fortran_atlas/validate_atlas_mod.F90 | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index a217b2dd..929cd7c6 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,9 @@ Balthasar Reuter (balthasar.reuter@ecmwf.int) minor modifications (i.e. derived types/global paramters handling). Turned off by default, activate at the build stage with `--cloudsc-fortran-pyiface=ON`. +- **dwarf-cloudsc-fortran-atlas**: A version of **dwarf-cloudsc-fortran** which uses the [Atlas library](https://github.com/ecmwf/atlas) + and its Field and FieldSet data stuctures. There are two storage settings for variables. If the environment variable + CLOUDSC_ATLAS_MULTIFIELD is "0", "OFF", or "FALSE", the variables are managed as atlas::FieldSet, which is an array of atlas::Fields. For other values of CLOUDSC_ATLAS_MULTIFIELD, a batching of variables is used as (BLK_IDX, LEV, VAR_ID, BLK_ID). ## Download and Installation diff --git a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 index e6f83ff0..175f7b01 100644 --- a/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 +++ b/src/cloudsc_fortran_atlas/cloudsc_global_atlas_state_mod.F90 @@ -132,10 +132,6 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_BLOCK(SELF, FSET, IBLK) REAL(KIND=JPRB), POINTER :: TMP3D(:,:,:) -! CALL FSET%UPDATE_DEVICE(1,IBLK) -! field = fset%field(1) -! call field%update_device() - CALL FSET%DATA(1, SELF%PLCRIT_AER, IBLK) CALL FSET%DATA(2, SELF%PICRIT_AER, IBLK) CALL FSET%DATA(3, SELF%PRE_ICE, IBLK) @@ -295,7 +291,6 @@ SUBROUTINE CLOUDSC_GLOBAL_ATLAS_STATE_LOAD(SELF, FSET, FSPACE, NPROMA, NGPTOTG) OUT_MFIELD_CONFIG(IVAR) = ATLAS_CONFIG() CALL OUT_MFIELD_CONFIG(IVAR)%SET("name", TRIM(OUT_VAR_NAMES(IVAR))) CALL OUT_MFIELD_CONFIG(IVAR)%SET("levels", SELF%KLEV+1) - ! CALL FSET%ADD(FSPACE%CREATE_FIELD(NAME=TRIM(OUT_VAR_NAMES(IVAR)), KIND=ATLAS_REAL(JPRB))) END DO CALL CONFIG%SET("nlev", 1 + FSPACE%LEVELS()) CALL CONFIG%SET("fields", OUT_MFIELD_CONFIG) diff --git a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 index 9d852c17..58f0ebc3 100644 --- a/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 +++ b/src/cloudsc_fortran_atlas/validate_atlas_mod.F90 @@ -70,7 +70,6 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) ZMAX_VAL_ERR(2) = 0.0_JPRB ZSUM_ERR_ABS(:) = 0.0_JPRB - !CALL INPUT_INITIALIZE(NAME='reference') IF (FRANK == 2) THEN CALL LOAD_AND_EXPAND(NAME, REF_R2, NLON, NPROMA, NGPTOT, NBLOCKS, NGPTOTG) CALL FIELD%DATA(FIELD_R1) @@ -160,7 +159,6 @@ SUBROUTINE VALIDATEVAR_ATLAS(FSET, FSPACE, NAME, NLON, NGPTOTG, STATE_VAR) PRINT *, "FIELD RANK NOT SUPPORTED" CALL EXIT(1) ENDIF - !CALL INPUT_FINALIZE() CALL CLOUDSC_MPI_REDUCE_MIN(ZMINVAL, 1, 0) CALL CLOUDSC_MPI_REDUCE_MAX(ZMAX_VAL_ERR, 2, 0) From 2959ac776e4ccfbf5324eead97158d390ba55cb4 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Fri, 9 Feb 2024 15:00:07 +0000 Subject: [PATCH 149/174] Fixing CMake integration for HDF5 for C variant --- src/cloudsc_c/CMakeLists.txt | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/cloudsc_c/CMakeLists.txt b/src/cloudsc_c/CMakeLists.txt index 45b1e349..a29a9b2a 100644 --- a/src/cloudsc_c/CMakeLists.txt +++ b/src/cloudsc_c/CMakeLists.txt @@ -14,17 +14,9 @@ ecbuild_add_option( FEATURE CLOUDSC_C if( HAVE_CLOUDSC_C ) - message("HDF5 include dirs: ${HDF5_C_INCLUDE_DIRS}") - message("HDF5 lib: ${HDF5_LIBRARIES}") - message("HDF5 lib c: ${HDF5_C_LIBRARIES}") - message("HDF5 library dirs: ${HDF5_C_LIBRARY_DIRS}") - set( CMAKE_C_STANDARD 11 ) set( CMAKE_C_STANDARD_REQUIRED ON ) - # necessary for AC - link_directories(string (REPLACE ";" " " DEST "${HDF5_C_LIBRARY_DIRS}")>) - ecbuild_add_library( TARGET dwarf-cloudsc-c-lib INSTALL_HEADERS LISTED @@ -45,15 +37,11 @@ if( HAVE_CLOUDSC_C ) cloudsc/cloudsc_validate.c cloudsc/mycpu.h cloudsc/mycpu.c - PRIVATE_INCLUDES - # $<${HAVE_HDF5}:${HDF5_C_INCLUDE_DIRS}> # works on LUMI, doesn't work on AC - $<${HAVE_HDF5}:string (REPLACE ";" " " DEST "${HDF5_C_INCLUDE_DIRS}")> # necessar on AC, not on LUMI PUBLIC_INCLUDES $ $ PUBLIC_LIBS - $<${HAVE_HDF5}:hdf5> - # $<${HAVE_HDF5}:string (REPLACE ";" " " DEST "${HDF5_C_LIBRARIES}")> + $<${HAVE_HDF5}:hdf5::hdf5> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C> DEFINITIONS From cb7ddd9d58008d84c062fb2ffb43c7ba59bddda2 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Fri, 9 Feb 2024 15:02:23 +0000 Subject: [PATCH 150/174] Always include C variant in 'expected_targets', since this variant does not longer depend on serialbox --- .github/scripts/verify-targets.sh | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index 41c8caee..c2c67147 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -8,12 +8,7 @@ exit_code=0 # Build the list of targets # -targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-fortran) - -if [[ "$io_library_flag" == "--with-serialbox" ]] -then - targets+=(dwarf-cloudsc-c) -fi +targets=(dwarf-P-cloudMicrophysics-IFSScheme dwarf-cloudsc-fortran dwarf-cloudsc-c) if [[ "$build_flags" == *"--with-gpu"* ]] then From 84b8fe4ae0c68990e953521335a2645a1c909541 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Fri, 9 Feb 2024 15:06:33 +0000 Subject: [PATCH 151/174] C variant load_state via HDF5: removed some leftover commented code lines/snippets --- src/cloudsc_c/cloudsc/load_state.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cloudsc_c/cloudsc/load_state.c b/src/cloudsc_c/cloudsc/load_state.c index 83d108bb..ea3cecc1 100644 --- a/src/cloudsc_c/cloudsc/load_state.c +++ b/src/cloudsc_c/cloudsc/load_state.c @@ -247,8 +247,6 @@ void load_and_expand_1d_int(hid_t file_id, const char *name, int nlon, int nprom herr_t status; status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); status = H5Dclose(dataset_id); - //expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); - //serialboxSerializerRead(serializer, name, savepoint, buffer, strides, 1); expand_1d_int((int *)buffer, field, nlon, nproma, ngptot, nblocks); } @@ -261,8 +259,6 @@ void load_and_expand_2d(hid_t file_id, const char *name, int nlon, int nlev, int herr_t status; status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); status = H5Dclose(dataset_id); - //expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); - //serialboxSerializerRead(serializer, name, savepoint, buffer, strides, 2); expand_2d((double *)buffer, field, nlon, nlev, nproma, ngptot, nblocks); } @@ -275,8 +271,6 @@ void load_and_expand_3d(hid_t file_id, const char *name, int nlon, int nlev, int herr_t status; status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); status = H5Dclose(dataset_id); - //expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); - //expand_2d((double *)buffer, field, nlon, nlev, nproma, ngptot, nblocks); expand_3d((double *)buffer, field, nlon, nlev, nclv, nproma, ngptot, nblocks); } #endif From ef723a4745ffeafea2ff475a2786e52c36c032b3 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Wed, 31 Jan 2024 19:14:47 +0000 Subject: [PATCH 152/174] Remove stack_mod --- src/cloudsc_loki/CMakeLists.txt | 2 -- src/cloudsc_loki/stack_mod.F90 | 21 --------------------- 2 files changed, 23 deletions(-) delete mode 100644 src/cloudsc_loki/stack_mod.F90 diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index af226e76..8d8a9697 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -158,7 +158,6 @@ if( HAVE_CLOUDSC_LOKI ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-idem-stack SOURCES dwarf_cloudsc.F90 - stack_mod.F90 loki-idem-stack/cloudsc_driver_loki_mod.idem_stack.F90 loki-idem-stack/cloudsc.idem_stack.F90 LIBS @@ -471,7 +470,6 @@ if( HAVE_CLOUDSC_LOKI ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc-stack SOURCES dwarf_cloudsc.F90 - stack_mod.F90 loki-scc-stack/cloudsc_driver_loki_mod.scc_stack.F90 loki-scc-stack/cloudsc.scc_stack.F90 LIBS diff --git a/src/cloudsc_loki/stack_mod.F90 b/src/cloudsc_loki/stack_mod.F90 deleted file mode 100644 index 8d076801..00000000 --- a/src/cloudsc_loki/stack_mod.F90 +++ /dev/null @@ -1,21 +0,0 @@ -! (C) Copyright 1988- ECMWF. -! -! This software is licensed under the terms of the Apache Licence Version 2.0 -! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -! -! In applying this licence, ECMWF does not waive the privileges and immunities -! granted to it by virtue of its status as an intergovernmental organisation -! nor does it submit to any jurisdiction. - -MODULE STACK_MOD - -IMPLICIT NONE - -TYPE STACK - INTEGER*8 :: L, U -END TYPE - -PRIVATE -PUBLIC :: STACK - -END MODULE From 0efd082b9c087b4ae31c077a803305699ac7fc43 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Wed, 31 Jan 2024 19:27:15 +0000 Subject: [PATCH 153/174] Replace deprecated loki_transform_convert macro and add config for new scheduler --- src/cloudsc_loki/CMakeLists.txt | 265 +++++++++++++++++++-------- src/cloudsc_loki/cloudsc_loki.config | 8 +- 2 files changed, 189 insertions(+), 84 deletions(-) diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index 8d8a9697..94cfe212 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -80,16 +80,28 @@ if( HAVE_CLOUDSC_LOKI ) cloudsc_xmod( loki-idem ) - loki_transform_convert( - MODE idem FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + OUTPUT + loki-idem/cloudsc.idem.F90 + loki-idem/cloudsc_driver_loki_mod.idem.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE idem CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 - INCLUDES ${COMMON_INCLUDE} - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem - OUTPUT loki-idem/cloudsc.idem.F90 loki-idem/cloudsc_driver_loki_mod.idem.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + CPP + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-idem @@ -143,16 +155,28 @@ if( HAVE_CLOUDSC_LOKI ) cloudsc_xmod( loki-idem-stack ) - loki_transform_convert( - MODE idem-stack FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + OUTPUT + loki-idem-stack/cloudsc.idem_stack.F90 + loki-idem-stack/cloudsc_driver_loki_mod.idem_stack.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-idem-stack + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE idem-stack CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 - INCLUDES ${COMMON_INCLUDE} - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-idem-stack - OUTPUT loki-idem-stack/cloudsc.idem_stack.F90 loki-idem-stack/cloudsc_driver_loki_mod.idem_stack.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + CPP + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-idem-stack @@ -213,16 +237,28 @@ if( HAVE_CLOUDSC_LOKI ) cloudsc_xmod( loki-sca ) - loki_transform_convert( - MODE sca FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + OUTPUT + loki-sca/cloudsc.sca.F90 + loki-sca/cloudsc_driver_loki_mod.sca.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-sca + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE sca CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 - INCLUDES ${COMMON_INCLUDE} - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-sca - OUTPUT loki-sca/cloudsc.sca.F90 loki-sca/cloudsc_driver_loki_mod.sca.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + CPP + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-sca @@ -261,16 +297,28 @@ if( HAVE_CLOUDSC_LOKI ) cloudsc_xmod( loki-claw-cpu ) - loki_transform_convert( - MODE claw FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + OUTPUT + loki-claw-cpu/cloudsc.claw.F90 + loki-claw-cpu/cloudsc_driver_loki_mod.claw.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-claw-cpu + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE claw CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 - INCLUDES ${COMMON_INCLUDE} - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-claw-cpu - OUTPUT loki-claw-cpu/cloudsc.claw.F90 loki-claw-cpu/cloudsc_driver_loki_mod.claw.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + CPP + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) # We purposefully suppress CLAWs insertion of OpenMP loops, @@ -349,18 +397,34 @@ if( HAVE_CLOUDSC_LOKI ) # Uses Loki-frontend CPP to switch to statement function variant again, # but suppresses inlining of stmt funcs by omitting `--include` - loki_transform_convert( - MODE claw FRONTEND ${LOKI_FRONTEND} CPP + + loki_transform( + COMMAND convert + OUTPUT + loki-claw-gpu/cloudsc.claw.F90 + loki-claw-gpu/cloudsc_driver_loki_mod.claw.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-claw-gpu + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE claw CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC} - DATA_OFFLOAD REMOVE_OPENMP - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-claw-gpu - OUTPUT loki-claw-gpu/cloudsc.claw.F90 loki-claw-gpu/cloudsc_driver_loki_mod.claw.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + CPP + DATA_OFFLOAD + REMOVE_OPENMP + DEFINITIONS + CLOUDSC_GPU_TIMING + ${CLOUDSC_DEFINE_STMT_FUNC} + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) claw_compile( @@ -409,18 +473,33 @@ if( HAVE_CLOUDSC_LOKI ) cloudsc_xmod( loki-scc ) - loki_transform_convert( - MODE scc FRONTEND ${LOKI_FRONTEND} CPP + loki_transform( + COMMAND convert + OUTPUT + loki-scc/cloudsc.scc.F90 + loki-scc/cloudsc_driver_loki_mod.scc.F90 + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE scc CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC} - DATA_OFFLOAD REMOVE_OPENMP - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc - OUTPUT loki-scc/cloudsc.scc.F90 loki-scc/cloudsc_driver_loki_mod.scc.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + CPP + DATA_OFFLOAD + REMOVE_OPENMP + DEFINITIONS + CLOUDSC_GPU_TIMING + ${CLOUDSC_DEFINE_STMT_FUNC} + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc @@ -451,20 +530,33 @@ if( HAVE_CLOUDSC_LOKI ) cloudsc_xmod( loki-scc-stack ) - loki_transform_convert( - MODE scc-stack FRONTEND ${LOKI_FRONTEND} CPP - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC} - DATA_OFFLOAD REMOVE_OPENMP - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-stack + loki_transform( + COMMAND convert OUTPUT loki-scc-stack/cloudsc.scc_stack.F90 loki-scc-stack/cloudsc_driver_loki_mod.scc_stack.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-stack + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE scc-stack + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + CPP + DATA_OFFLOAD + REMOVE_OPENMP + DEFINITIONS + CLOUDSC_GPU_TIMING + ${CLOUDSC_DEFINE_STMT_FUNC} + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc-stack @@ -501,20 +593,33 @@ if( HAVE_CLOUDSC_LOKI ) cloudsc_xmod( loki-scc-hoist ) - loki_transform_convert( - MODE scc-hoist FRONTEND ${LOKI_FRONTEND} CPP - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - PATH ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS ${COMMON_MODULE}/yomphyder.F90 ${COMMON_MODULE}/yoecldp.F90 - INCLUDES ${COMMON_INCLUDE} - DEFINITIONS CLOUDSC_GPU_TIMING ${CLOUDSC_DEFINE_STMT_FUNC} - DATA_OFFLOAD REMOVE_OPENMP - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + loki_transform( + COMMAND convert OUTPUT loki-scc-hoist/cloudsc.scc_hoist.F90 loki-scc-hoist/cloudsc_driver_loki_mod.scc_hoist.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-hoist + DEPENDS + cloudsc.F90 + cloudsc_driver_loki_mod.F90 + ${_OMNI_DEPENDENCIES} + MODE scc-hoist + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + CPP + DATA_OFFLOAD + REMOVE_OPENMP + DEFINITIONS + CLOUDSC_GPU_TIMING + ${CLOUDSC_DEFINE_STMT_FUNC} + FRONTEND ${LOKI_FRONTEND} + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR} + ${COMMON_MODULE} + INCLUDES + ${COMMON_INCLUDE} + XMOD + ${_TARGET_XMOD_DIR} + ${XMOD_DIR} ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc-hoist diff --git a/src/cloudsc_loki/cloudsc_loki.config b/src/cloudsc_loki/cloudsc_loki.config index 6eab8577..d85e2096 100644 --- a/src/cloudsc_loki/cloudsc_loki.config +++ b/src/cloudsc_loki/cloudsc_loki.config @@ -6,10 +6,10 @@ strict = true # Throw exceptions during dicovery # Ensure that we are never adding these to the tree, and thus # do not attempt to look up the source files for these. -# TODO: Add type-bound procedure support and adjust scheduler to it -disable = ['performance_timer%start', 'performance_timer%end', 'performance_timer%thread_start', - 'performance_timer%thread_end', 'performance_timer%thread_log', - 'performance_timer%thread_log', 'performance_timer%print_performance'] +disable = ['timer_mod', 'abort', 'file_io_mod', 'foe*', 'fokoop'] + +# Prune the tree for these to ensure they are not processed by transformations +block = ['parkind1', 'yomphyder', 'yoecldp', 'fc*_mod'] # Define entry point for call-tree transformation [routines] From 4beb57df98ac9f050e2c33ae1e1df01040c49d2d Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Wed, 7 Feb 2024 12:59:01 +0000 Subject: [PATCH 154/174] Remove xmod generation mechanism --- src/cloudsc_loki/CMakeLists.txt | 111 +- src/cloudsc_loki/xmod/file_io_mod.xmod | 836 ++++++++++ src/cloudsc_loki/xmod/yoecldp.xmod | 2003 ++++-------------------- src/cloudsc_loki/xmod/yoethf.xmod | 726 ++++++++- src/cloudsc_loki/xmod/yomcst.xmod | 1467 +++++++++++++---- 5 files changed, 2969 insertions(+), 2174 deletions(-) create mode 100644 src/cloudsc_loki/xmod/file_io_mod.xmod diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index 94cfe212..94f339e2 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -18,43 +18,6 @@ ecbuild_add_option( FEATURE CLOUDSC_LOKI_CLAW CONDITION HAVE_CLOUDSC_LOKI ) -function( cloudsc_xmod _TARGET ) - - if( TARGET clawfc AND ${LOKI_FRONTEND} STREQUAL "omni" ) - - # Ugly hack: OMNI needs the xmod-file for cloudsc.F90 to be able to - # parse the driver file successfully. However, the scheduler currently - # doesn't take this into account and fails when parsing driver before - # kernel file. - # (Note: the problem vanishes in serial builds as there the C-transpile - # target is built first which doesn't use the scheduler and therefore - # creates the necessary xmod files for us) - # TODO: This can be removed once the scheduler is aware of these dependencies - # and parses files in the right order - - set( _TARGET_XMOD_DIR "${CMAKE_CURRENT_BINARY_DIR}/${_TARGET}" ) - set( _TARGET_XMOD_DIR ${_TARGET_XMOD_DIR} PARENT_SCOPE ) - file( MAKE_DIRECTORY ${_TARGET_XMOD_DIR} ) - - generate_xmod( - OUTPUT ${_TARGET_XMOD_DIR}/cloudsc.xml - SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc.F90 - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - ) - - # Use XML files as dependencies (not xmod) as they are updated by later calls of - # F_Front (and thus would trigger new execution rounds) - set( _OMNI_DEPENDENCIES ${_TARGET_XMOD_DIR}/cloudsc.xml PARENT_SCOPE ) - - else() - - set( _TARGET_XMOD_DIR "" PARENT_SCOPE) - set( _OMNI_DEPENDENCIES "" PARENT_SCOPE ) - - endif() - -endfunction() - if( HAVE_CLOUDSC_LOKI ) #################################################### @@ -78,8 +41,6 @@ if( HAVE_CLOUDSC_LOKI ) ## * Internal "do-nothing" mode for Loki debug ## #################################################### - cloudsc_xmod( loki-idem ) - loki_transform( COMMAND convert OUTPUT @@ -153,8 +114,6 @@ if( HAVE_CLOUDSC_LOKI ) ## * Internal "do-nothing" mode for Loki debug ## ############################################################ - cloudsc_xmod( loki-idem-stack ) - loki_transform( COMMAND convert OUTPUT @@ -235,8 +194,6 @@ if( HAVE_CLOUDSC_LOKI ) ## * Extract de-vectorized SCA format code ## #################################################### - cloudsc_xmod( loki-sca ) - loki_transform( COMMAND convert OUTPUT @@ -295,8 +252,6 @@ if( HAVE_CLOUDSC_LOKI ) #################################################### if( HAVE_CLOUDSC_LOKI_CLAW AND TARGET clawfc ) - cloudsc_xmod( loki-claw-cpu ) - loki_transform( COMMAND convert OUTPUT @@ -393,8 +348,6 @@ if( HAVE_CLOUDSC_LOKI ) #################################################### if( HAVE_CLOUDSC_LOKI_CLAW AND TARGET clawfc ) - cloudsc_xmod( loki-claw-gpu ) - # Uses Loki-frontend CPP to switch to statement function variant again, # but suppresses inlining of stmt funcs by omitting `--include` @@ -471,8 +424,6 @@ if( HAVE_CLOUDSC_LOKI ) ## * Invokes compute kernel as `!$acc vector` ## #################################################### - cloudsc_xmod( loki-scc ) - loki_transform( COMMAND convert OUTPUT @@ -528,8 +479,6 @@ if( HAVE_CLOUDSC_LOKI ) ## * Allocates temporaries using pool allocator ## ###################################################### - cloudsc_xmod( loki-scc-stack ) - loki_transform( COMMAND convert OUTPUT @@ -591,8 +540,6 @@ if( HAVE_CLOUDSC_LOKI ) ## * Temporary arrays hoisted to driver ## #################################################### - cloudsc_xmod( loki-scc-hoist ) - loki_transform( COMMAND convert OUTPUT @@ -655,8 +602,6 @@ if( HAVE_CLOUDSC_LOKI ) if( HAVE_CUDA ) # scc-cuf-parametrise - cloudsc_xmod( loki-scc-cuf-parametrise ) - loki_transform_convert( MODE cuf-parametrise FRONTEND ${LOKI_FRONTEND} CPP CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_cuf_loki.config @@ -674,7 +619,7 @@ if( HAVE_CUDA ) OUTPUT loki-scc-cuf-parametrise/cuf_cloudsc_driver_loki_mod.cuf_parametrise.F90 loki-scc-cuf-parametrise/cuf_cloudsc.cuf_parametrise.F90 - DEPENDS cuf_cloudsc.F90 cuf_cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + DEPENDS cuf_cloudsc.F90 cuf_cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} ) set_source_files_properties( @@ -704,8 +649,6 @@ if( HAVE_CUDA ) ) # scc-cuf-hoist - cloudsc_xmod( loki-scc-cuf-hoist ) - loki_transform_convert( MODE cuf-hoist FRONTEND ${LOKI_FRONTEND} CPP CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_cuf_loki.config @@ -723,13 +666,13 @@ if( HAVE_CUDA ) OUTPUT loki-scc-cuf-hoist/cuf_cloudsc_driver_loki_mod.cuf_hoist.F90 loki-scc-cuf-hoist/cuf_cloudsc.cuf_hoist.F90 - DEPENDS cuf_cloudsc.F90 cuf_cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + DEPENDS cuf_cloudsc.F90 cuf_cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} ) set_source_files_properties( loki-scc-cuf-hoist/cuf_cloudsc_driver_loki_mod.cuf_hoist.F90 - loki-scc-cuf-hoist/cuf_cloudsc.cuf_hoist.F90 - PROPERTIES COMPILE_FLAGS "-cuda -gpu=maxregcount:128" + loki-scc-cuf-hoist/cuf_cloudsc.cuf_hoist.F90 + PROPERTIES COMPILE_FLAGS "-cuda -gpu=maxregcount:128" ) ecbuild_add_executable( TARGET dwarf-cloudsc-loki-scc-cuf-hoist @@ -760,30 +703,28 @@ endif() if ( NOT HAVE_SINGLE_PRECISION ) - cloudsc_xmod( loki-c ) - - loki_transform( - COMMAND convert - MODE c FRONTEND ${LOKI_FRONTEND} CPP - CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config - SOURCES ${CMAKE_CURRENT_SOURCE_DIR} - HEADERS - ${COMMON_MODULE}/parkind1.F90 - ${COMMON_MODULE}/yomphyder.F90 - ${COMMON_MODULE}/yomcst.F90 - ${COMMON_MODULE}/yoethf.F90 - ${COMMON_MODULE}/yoecldp.F90 - ${COMMON_MODULE}/fcttre_mod.F90 - ${COMMON_MODULE}/fccld_mod.F90 - INCLUDES ${COMMON_INCLUDE} - XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} - BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-c - OUTPUT - loki-c/cloudsc_driver_loki_mod.c.F90 - loki-c/cloudsc_fc.F90 loki-c/cloudsc_c.c - loki-c/yoethf_fc.F90 loki-c/yomcst_fc.F90 - loki-c/yoecldp_fc.F90 - DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} + loki_transform( + COMMAND convert + MODE c FRONTEND ${LOKI_FRONTEND} CPP + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config + SOURCES ${CMAKE_CURRENT_SOURCE_DIR} + HEADERS + ${COMMON_MODULE}/parkind1.F90 + ${COMMON_MODULE}/yomphyder.F90 + ${COMMON_MODULE}/yomcst.F90 + ${COMMON_MODULE}/yoethf.F90 + ${COMMON_MODULE}/yoecldp.F90 + ${COMMON_MODULE}/fcttre_mod.F90 + ${COMMON_MODULE}/fccld_mod.F90 + INCLUDES ${COMMON_INCLUDE} + XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR} + BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-c + OUTPUT + loki-c/cloudsc_driver_loki_mod.c.F90 + loki-c/cloudsc_fc.F90 loki-c/cloudsc_c.c + loki-c/yoethf_fc.F90 loki-c/yomcst_fc.F90 + loki-c/yoecldp_fc.F90 + DEPENDS cloudsc.F90 cloudsc_driver_loki_mod.F90 ${_OMNI_DEPENDENCIES} ) # Define the CLAW-CPU build target for this variant diff --git a/src/cloudsc_loki/xmod/file_io_mod.xmod b/src/cloudsc_loki/xmod/file_io_mod.xmod new file mode 100644 index 00000000..0999ba61 --- /dev/null +++ b/src/cloudsc_loki/xmod/file_io_mod.xmod @@ -0,0 +1,836 @@ + + file_io_mod + + + + + + + + + + + + + + + + + name + variable + + + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + name + variable + + + + + + + + + + 4 + + + + + + name + variable + + + + + + + + + + + 4 + + + + + + name + start + end + size + nlon + buffer + + + + + + + + + + 4 + + + + + + + + + + + 1 + + + size + + + + + + + + + + + + name + start + end + size + nlon + buffer + + + + + + + + + + 4 + + + + + + + + + 4 + + + + + + + 1 + + + size + + + + + + 4 + + + + + + + + + + name + start + end + size + nlon + buffer + + + + + + + + + + 4 + + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + 1 + + + size + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + + + + name + start + end + size + nlon + nlev + buffer + + + + + + + + + + 4 + + + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + 1 + + + size + + + + + + + 1 + + + size + + + + + 1 + + + nlev + + + + + + 4 + + + + + + + 1 + + + 2 + + + + + + + + 1 + + + 2 + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + + + + + + + + + + name + start + end + size + nlon + nlev + ndim + buffer + + + + + + + + + + 4 + + + + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + 1 + + + size + + + + + + + 1 + + + size + + + + + 1 + + + nlev + + + + + + + 1 + + + size + + + + + 1 + + + nlev + + + + + 1 + + + ndim + + + + + + 4 + + + + + + + 1 + + + 3 + + + + + + + + 1 + + + 3 + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + + + + + + + + + + + + + + + + + + + name + + + + + + + + + + + + + + name + variable + + + + + name + variable + + + + + name + variable + + + + + name + start + end + size + nlon + buffer + + + + + name + start + end + size + nlon + nlev + buffer + + + + + name + start + end + size + nlon + nlev + ndim + buffer + + + + + name + start + end + size + nlon + buffer + + + + + name + start + end + size + nlon + buffer + + + + + + jpit + + + jpis + + + jpim + + + jpib + + + jpia + + + jprt + + + jprs + + + jprm + + + jprb + + + jprd + + + load_scalar + + + load_array + + + input_initialize + + + input_finalize + + + load_scalar_real + + + load_scalar_int + + + load_scalar_log + + + load_array_i1 + + + load_array_l1 + + + load_array_r1 + + + load_array_r2 + + + load_array_r3 + + + + + jpit + + + selected_int_kind + + 2 + + + + + + jpis + + + selected_int_kind + + 4 + + + + + + jpim + + + selected_int_kind + + 9 + + + + + + jpib + + + selected_int_kind + + 12 + + + + + + jpia + + + selected_int_kind + + 9 + + + + + + jprt + + + selected_real_kind + + 2 + 1 + + + + + + jprs + + + selected_real_kind + + 4 + 2 + + + + + + jprm + + + selected_real_kind + + 6 + 37 + + + + + + jprb + + + selected_real_kind + + 13 + 300 + + + + + + jprd + + + selected_real_kind + + 13 + 300 + + + + + + + + + load_scalar_real + load_scalar_log + load_scalar_int + + + load_scalar_real + load_scalar_log + load_scalar_int + + + load_scalar_real + load_scalar_log + load_scalar_int + + + + + load_array_r1 + load_array_r2 + load_array_r3 + load_array_l1 + load_array_i1 + + + load_array_r1 + load_array_r2 + load_array_r3 + load_array_l1 + load_array_i1 + + + load_array_r1 + load_array_r2 + load_array_r3 + load_array_l1 + load_array_i1 + + + load_array_r1 + load_array_r2 + load_array_r3 + load_array_l1 + load_array_i1 + + + load_array_r1 + load_array_r2 + load_array_r3 + load_array_l1 + load_array_i1 + + + + + + diff --git a/src/cloudsc_loki/xmod/yoecldp.xmod b/src/cloudsc_loki/xmod/yoecldp.xmod index a617954d..9946c976 100644 --- a/src/cloudsc_loki/xmod/yoecldp.xmod +++ b/src/cloudsc_loki/xmod/yoecldp.xmod @@ -2,1491 +2,24 @@ yoecldp parkind1 + file_io_mod - - - - - 4 - - - - - - 4 - - - - - - 4 - - - - - - 4 - - - - - - 4 - - - - - - 4 - - - - - - - ramid - - - rcldiff - - - rcldiff_convi - - - rclcrit - - - rclcrit_sea - - - rclcrit_land - - - rkconv - - - rprc1 - - - rprc2 - - - rcldmax - - - rpecons - - - rvrfactor - - - rprecrhmax - - - rtaumel - - - ramin - - - rlmin - - - rkooptau - - - rcldtopp - - - rlcritsnow - - - rsnowlin1 - - - rsnowlin2 - - - ricehi1 - - - ricehi2 - - - riceinit - - - rvice - - - rvrain - - - rvsnow - - - rthomo - - - rcovpmin - - - rccn - - - rnice - - - rccnom - - - rccnss - - - rccnsu - - - rcldtopcf - - - rdepliqrefrate - - - rdepliqrefdepth - - - rcl_kkaac - - - rcl_kkbac - - - rcl_kkaau - - - rcl_kkbauq - - - rcl_kkbaun - - - rcl_kk_cloud_num_sea - - - rcl_kk_cloud_num_land - - - rcl_ai - - - rcl_bi - - - rcl_ci - - - rcl_di - - - rcl_x1i - - - rcl_x2i - - - rcl_x3i - - - rcl_x4i - - - rcl_const1i - - - rcl_const2i - - - rcl_const3i - - - rcl_const4i - - - rcl_const5i - - - rcl_const6i - - - rcl_apb1 - - - rcl_apb2 - - - rcl_apb3 - - - rcl_as - - - rcl_bs - - - rcl_cs - - - rcl_ds - - - rcl_x1s - - - rcl_x2s - - - rcl_x3s - - - rcl_x4s - - - rcl_const1s - - - rcl_const2s - - - rcl_const3s - - - rcl_const4s - - - rcl_const5s - - - rcl_const6s - - - rcl_const7s - - - rcl_const8s - - - rdenswat - - - rdensref - - - rcl_ar - - - rcl_br - - - rcl_cr - - - rcl_dr - - - rcl_x1r - - - rcl_x2r - - - rcl_x4r - - - rcl_ka273 - - - rcl_cdenom1 - - - rcl_cdenom2 - - - rcl_cdenom3 - - - rcl_schmidt - - - rcl_dynvisc - - - rcl_const1r - - - rcl_const2r - - - rcl_const3r - - - rcl_const4r - - - rcl_fac1 - - - rcl_fac2 - - - rcl_const5r - - - rcl_const6r - - - rcl_fzrab - - - rcl_fzrbb - - - lcldextra - - - lcldbudget - - - nssopt - - - ncldtop - - - naeclbc - - - naecldu - - - naeclom - - - naeclss - - - naeclsu - - - nclddiag - - - naercld - - - laerliqautolsp - - - laerliqautocp - - - laerliqautocpb - - - laerliqcoll - - - laericesed - - - laericeauto - - - nshapep - - - nshapeq - - - nbeta - - - rbeta - - - rbetap1 - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - - - - - - - selected_real_kind - - 13 - 300 - - - + + + + + + name + variable + + + + + - - + + selected_real_kind @@ -1497,56 +30,62 @@ - - - - - selected_real_kind - - 13 - 300 - - - + + + + name + variable + + + + + - - - - - selected_real_kind - - 13 - 300 - - - + + + + + name + variable + + + + + - - + + - - selected_real_kind - - 13 - 300 - - + 4 - - + + + + + name + start + end + size + nlon + buffer + + + + + + + + - - selected_real_kind - - 13 - 300 - - + 4 - - + + + + + selected_real_kind @@ -1557,32 +96,45 @@ - - - - - selected_real_kind - - 13 - 300 - - - + + + + + + 1 + + + size + + - - + + + name + start + end + size + nlon + nlev + buffer + + + + + + + + - - selected_real_kind - - 13 - 300 - - + 4 - - + + + + + + selected_real_kind @@ -1593,44 +145,65 @@ - - - - - selected_real_kind - - 13 - 300 - - - + + + + + + 1 + + + size + + - - - - - selected_real_kind - - 13 - 300 - - - + + + + 1 + + + size + + + + + 1 + + + nlev + + - - + + + name + start + end + size + nlon + nlev + ndim + buffer + + + + + + + + - - selected_real_kind - - 13 - 300 - - + 4 - - + + + + + + + selected_real_kind @@ -1641,159 +214,157 @@ - - - - 4 - - - - - - 4 - - - - - - 4 - - - - - - - - - - 4 - - - - - - 4 - + + + + + + 1 + + + size + + - - - - - selected_real_kind - - 13 - 300 - - - + + + + 1 + + + size + + + + + 1 + + + nlev + + - - - - - selected_real_kind - - 13 - 300 - - - + + + + 1 + + + size + + + + + 1 + + + nlev + + + + + 1 + + + ndim + + - - + + + name + start + end + size + nlon + buffer + + + + + + + + 4 - - - - - selected_real_kind - - 13 - 300 - - - - - - + + + + + + + - 0 + 1 - 100 + size - + + + name + start + end + size + nlon + buffer + + + + + + + + - - selected_real_kind - - 13 - 300 - - + 4 + + + + + + + + + 4 - - + + + - 0 + 1 - 100 + size - - - - - - + jpim - + jprb - - nclv - - - ncldql - - - ncldqi + + load_scalar - - ncldqr - - - ncldqs - - - ncldqv - - - tecldp - - - selected_real_kind - - - yrecldp + + load_array - jpim + jpim selected_int_kind @@ -1804,7 +375,7 @@ - jprb + jprb selected_real_kind @@ -1815,42 +386,6 @@ - - nclv - - 5 - - - - ncldql - - 1 - - - - ncldqi - - 2 - - - - ncldqr - - 3 - - - - ncldqs - - 4 - - - - ncldqv - - 5 - - diff --git a/src/cloudsc_loki/xmod/yoethf.xmod b/src/cloudsc_loki/xmod/yoethf.xmod index 4c5aa809..2352595e 100644 --- a/src/cloudsc_loki/xmod/yoethf.xmod +++ b/src/cloudsc_loki/xmod/yoethf.xmod @@ -2,13 +2,271 @@ yoethf parkind1 + file_io_mod - - - - - + + + + + + name + variable + + + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + name + variable + + + + + + + + + + + name + variable + + + + + + + + + + 4 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + selected_real_kind @@ -19,8 +277,8 @@ - - + + selected_real_kind @@ -31,8 +289,8 @@ - - + + selected_real_kind @@ -43,8 +301,8 @@ - - + + selected_real_kind @@ -55,8 +313,8 @@ - - + + selected_real_kind @@ -67,8 +325,8 @@ - - + + selected_real_kind @@ -79,8 +337,81 @@ - - + + + + + r2es + + + r3les + + + r3ies + + + r4les + + + r4ies + + + r5les + + + r5ies + + + rvtmp2 + + + rhoh2o + + + r5alvcp + + + r5alscp + + + ralvdcp + + + ralsdcp + + + ralfdcp + + + rtwat + + + rtber + + + rtbercu + + + rtice + + + rticecu + + + rtwat_rtice_r + + + rtwat_rticecu_r + + + rkoop1 + + + rkoop2 + + + + selected_real_kind @@ -91,8 +422,8 @@ - - + + selected_real_kind @@ -103,8 +434,8 @@ - - + + selected_real_kind @@ -115,8 +446,8 @@ - - + + selected_real_kind @@ -127,8 +458,8 @@ - - + + selected_real_kind @@ -139,8 +470,8 @@ - - + + selected_real_kind @@ -151,8 +482,8 @@ - - + + selected_real_kind @@ -163,8 +494,8 @@ - - + + selected_real_kind @@ -175,8 +506,8 @@ - - + + selected_real_kind @@ -187,8 +518,8 @@ - - + + selected_real_kind @@ -199,8 +530,8 @@ - - + + selected_real_kind @@ -211,8 +542,8 @@ - - + + selected_real_kind @@ -223,8 +554,8 @@ - - + + selected_real_kind @@ -235,8 +566,8 @@ - - + + selected_real_kind @@ -247,8 +578,8 @@ - - + + selected_real_kind @@ -259,8 +590,8 @@ - - + + selected_real_kind @@ -271,8 +602,8 @@ - - + + selected_real_kind @@ -283,91 +614,304 @@ - + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + + + + + + + + + + + 4 + + + + + 5 + + + + + 5 + + + + + 5 + + + + + 5 + + + + + 5 + + + + + 5 + + + + + 7 + + + + + 7 + + + + + 7 + + + + + 7 + + + + + 7 + + + + + 5 + + + + + 5 + + + + + 7 + + + + + 13 + + + + + 15 + + + + + 6 + + + + + 6 + + + + + + + + + + + + + + + + + + + + + + - + jpim - + jprb - + + load_scalar + + selected_real_kind - + r2es - + r3les - + r3ies - + r4les - + r4ies - + r5les - + r5ies - + rvtmp2 - + rhoh2o - + r5alvcp - + r5alscp - + ralvdcp - + ralsdcp - + ralfdcp - + rtwat - + rtber - + rtbercu - + rtice - + rticecu - + rtwat_rtice_r - + rtwat_rticecu_r - + rkoop1 - + rkoop2 + + toethf + + + yrthf + + + yoethf_load_parameters + + + yrthf_copy_parameters + - jpim + jpim selected_int_kind @@ -378,7 +922,7 @@ - jprb + jprb selected_real_kind @@ -393,5 +937,31 @@ + ! J.-J. MORCRETTE 91/07/14 ADAPTED TO I.F.S. + ! NAME TYPE PURPOSE + ! ---- ---- ------- + ! *R__ES* REAL *CONSTANTS USED FOR COMPUTATION OF SATURATION + ! MIXING RATIO OVER LIQUID WATER(*R_LES*) OR + ! ICE(*R_IES*). + ! *RVTMP2* REAL *RVTMP2=RCPV/RCPD-1. + ! *RHOH2O* REAL *DENSITY OF LIQUID WATER. (RATM/100.) + ! *R5ALVCP* REAL *R5LES*RLVTT/RCPD + ! *R5ALSCP* REAL *R5IES*RLSTT/RCPD + ! *RALVDCP* REAL *RLVTT/RCPD + ! *RALSDCP* REAL *RLSTT/RCPD + ! *RALFDCP* REAL *RLMLT/RCPD + ! *RTWAT* REAL *RTWAT=RTT + ! *RTBER* REAL *RTBER=RTT-0.05 + ! *RTBERCU REAL *RTBERCU=RTT-5.0 + ! *RTICE* REAL *RTICE=RTT-0.1 + ! *RTICECU* REAL *RTICECU=RTT-23.0 + ! *RKOOP? REAL *CONSTANTS TO DESCRIBE KOOP FORM FOR NUCLEATION + ! *RTWAT_RTICE_R* REAL *RTWAT_RTICE_R=1./(RTWAT-RTICE) + ! *RTWAT_RTICECU_R* REAL *RTWAT_RTICECU_R=1./(RTWAT-RTICECU) + ACC declare copyin(r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies, r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu, rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2) + OMP declare target(r2es, r3les, r3ies, r4les, r4ies, r5les, r5ies) + OMP declare target( r5alvcp, r5alscp, ralvdcp, ralsdcp, ralfdcp, rtwat, rtice, rticecu) + OMP declare target( rtwat_rtice_r, rtwat_rticecu_r, rkoop1, rkoop2) + ! ---------------------------------------------------------------- diff --git a/src/cloudsc_loki/xmod/yomcst.xmod b/src/cloudsc_loki/xmod/yomcst.xmod index 72783c2b..b4a3fe1e 100644 --- a/src/cloudsc_loki/xmod/yomcst.xmod +++ b/src/cloudsc_loki/xmod/yomcst.xmod @@ -2,12 +2,967 @@ yomcst parkind1 + file_io_mod - - - - + + + + + name + variable + + + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + name + variable + + + + + + + + + + + name + variable + + + + + + + + + + 4 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + rpi + + + rclum + + + rhpla + + + rkbol + + + rnavo + + + rday + + + rdayi + + + rhour + + + rea + + + repsm + + + rsiyea + + + rsiday + + + romega + + + ra + + + rg + + + r1sa + + + rsigma + + + ri0 + + + r + + + rmd + + + rmv + + + rmo3 + + + rd + + + rv + + + rcpd + + + rcpv + + + rcvd + + + rcvv + + + rkappa + + + retv + + + rmco2 + + + rmch4 + + + rmn2o + + + rmco + + + rmhcho + + + rmno2 + + + rmso2 + + + rmso4 + + + rcw + + + rcs + + + ratm + + + rtt + + + rlvtt + + + rlstt + + + rlvzer + + + rlszer + + + rlmlt + + + rdt + + + restt + + + rgamw + + + rbetw + + + ralpw + + + rgams + + + rbets + + + ralps + + + ralpd + + + rbetd + + + rgamd + + + rsnan + + + + + + + selected_real_kind + + 13 + 300 + + + + + + selected_real_kind @@ -18,8 +973,8 @@ - - + + selected_real_kind @@ -30,8 +985,8 @@ - - + + selected_real_kind @@ -42,8 +997,8 @@ - - + + selected_real_kind @@ -54,8 +1009,8 @@ - - + + selected_real_kind @@ -66,8 +1021,8 @@ - - + + selected_real_kind @@ -78,8 +1033,8 @@ - - + + selected_real_kind @@ -90,8 +1045,8 @@ - - + + selected_real_kind @@ -102,8 +1057,8 @@ - - + + selected_real_kind @@ -114,8 +1069,8 @@ - - + + selected_real_kind @@ -126,8 +1081,8 @@ - - + + selected_real_kind @@ -138,8 +1093,8 @@ - - + + selected_real_kind @@ -150,8 +1105,8 @@ - - + + selected_real_kind @@ -162,8 +1117,8 @@ - - + + selected_real_kind @@ -174,8 +1129,8 @@ - - + + selected_real_kind @@ -186,8 +1141,8 @@ - - + + selected_real_kind @@ -198,8 +1153,8 @@ - - + + selected_real_kind @@ -210,8 +1165,8 @@ - - + + selected_real_kind @@ -222,8 +1177,8 @@ - - + + selected_real_kind @@ -234,8 +1189,8 @@ - - + + selected_real_kind @@ -246,8 +1201,8 @@ - - + + selected_real_kind @@ -258,8 +1213,8 @@ - - + + selected_real_kind @@ -270,8 +1225,8 @@ - - + + selected_real_kind @@ -282,8 +1237,8 @@ - - + + selected_real_kind @@ -294,8 +1249,8 @@ - - + + selected_real_kind @@ -306,8 +1261,8 @@ - - + + selected_real_kind @@ -318,8 +1273,8 @@ - - + + selected_real_kind @@ -330,8 +1285,8 @@ - - + + selected_real_kind @@ -342,8 +1297,8 @@ - - + + selected_real_kind @@ -354,8 +1309,8 @@ - - + + selected_real_kind @@ -366,8 +1321,8 @@ - - + + selected_real_kind @@ -378,8 +1333,8 @@ - - + + selected_real_kind @@ -390,8 +1345,8 @@ - - + + selected_real_kind @@ -402,8 +1357,8 @@ - - + + selected_real_kind @@ -414,8 +1369,8 @@ - - + + selected_real_kind @@ -426,8 +1381,8 @@ - - + + selected_real_kind @@ -438,8 +1393,8 @@ - - + + selected_real_kind @@ -450,8 +1405,8 @@ - - + + selected_real_kind @@ -462,8 +1417,8 @@ - - + + selected_real_kind @@ -474,8 +1429,8 @@ - - + + selected_real_kind @@ -486,8 +1441,8 @@ - - + + selected_real_kind @@ -498,8 +1453,8 @@ - - + + selected_real_kind @@ -510,8 +1465,8 @@ - - + + selected_real_kind @@ -522,8 +1477,8 @@ - - + + selected_real_kind @@ -534,8 +1489,8 @@ - - + + selected_real_kind @@ -546,8 +1501,8 @@ - - + + selected_real_kind @@ -558,8 +1513,8 @@ - - + + selected_real_kind @@ -570,8 +1525,8 @@ - - + + selected_real_kind @@ -582,8 +1537,8 @@ - - + + selected_real_kind @@ -594,8 +1549,8 @@ - - + + selected_real_kind @@ -606,8 +1561,8 @@ - - + + selected_real_kind @@ -618,8 +1573,8 @@ - - + + selected_real_kind @@ -630,8 +1585,8 @@ - - + + selected_real_kind @@ -642,8 +1597,8 @@ - - + + selected_real_kind @@ -654,8 +1609,8 @@ - - + + selected_real_kind @@ -666,8 +1621,8 @@ - - + + selected_real_kind @@ -678,8 +1633,8 @@ - - + + selected_real_kind @@ -690,8 +1645,8 @@ - - + + selected_real_kind @@ -702,252 +1657,277 @@ - - + + + + + + + + + + + + + - 8 + 2 - - + - 8 + 2 - + - 7 + 4 - + - 6 + 4 - + 5 - + - 4 + 5 + + + + + 5 - + 3 - + 2 - - - - selected_real_kind - - 13 - 300 - - - - - + + + + + + + + + + - + jprb - + + load_scalar + + selected_real_kind - + rpi - + rclum - + rhpla - + rkbol - + rnavo - + rday - + rdayi - + rhour - + rea - + repsm - + rsiyea - + rsiday - + romega - + ra - + rg - + r1sa - + rsigma - + ri0 - + r - + rmd - + rmv - + rmo3 - + rd - + rv - + rcpd - + rcpv - + rcvd - + rcvv - + rkappa - + retv - + rmco2 - + rmch4 - + rmn2o - + rmco - + rmhcho - + rmno2 - + rmso2 - + rmso4 - + rcw - + rcs - + ratm - + rtt - + rlvtt - + rlstt - + rlvzer - + rlszer - + rlmlt - + rdt - + restt - + rgamw - + rbetw - + ralpw - + rgams - + rbets - + ralps - + ralpd - + rbetd - + rgamd - - csnan - - + rsnan + + tomcst + + + yrcst + + + yomcst_load_parameters + + + yrcst_copy_parameters + - jprb + jprb selected_real_kind @@ -958,73 +1938,6 @@ - - csnan - - - - - - - - - - char - - 0 - - - - char - - 0 - - - - - char - - 0 - - - - - char - - 0 - - - - - char - - 0 - - - - - char - - 0 - - - - - char - - 244 - - - - - char - - 127 - - - - - From 270417b30cd50b1589f58c6617fbf3a43c1181f3 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 13 Feb 2024 09:54:49 +0000 Subject: [PATCH 155/174] Update yoecldp.xmod --- src/cloudsc_loki/xmod/yoecldp.xmod | 2005 ++++++++++++++++++++++++---- 1 file changed, 1738 insertions(+), 267 deletions(-) diff --git a/src/cloudsc_loki/xmod/yoecldp.xmod b/src/cloudsc_loki/xmod/yoecldp.xmod index 9946c976..16083b51 100644 --- a/src/cloudsc_loki/xmod/yoecldp.xmod +++ b/src/cloudsc_loki/xmod/yoecldp.xmod @@ -2,24 +2,1491 @@ yoecldp parkind1 - file_io_mod - - - - - - name - variable - - - - - + + + + + 4 + + + + + + 4 + + + + + + 4 + + + + + + 4 + + + + + + 4 + + + + + + 4 + + + + + + + ramid + + + rcldiff + + + rcldiff_convi + + + rclcrit + + + rclcrit_sea + + + rclcrit_land + + + rkconv + + + rprc1 + + + rprc2 + + + rcldmax + + + rpecons + + + rvrfactor + + + rprecrhmax + + + rtaumel + + + ramin + + + rlmin + + + rkooptau + + + rcldtopp + + + rlcritsnow + + + rsnowlin1 + + + rsnowlin2 + + + ricehi1 + + + ricehi2 + + + riceinit + + + rvice + + + rvrain + + + rvsnow + + + rthomo + + + rcovpmin + + + rccn + + + rnice + + + rccnom + + + rccnss + + + rccnsu + + + rcldtopcf + + + rdepliqrefrate + + + rdepliqrefdepth + + + rcl_kkaac + + + rcl_kkbac + + + rcl_kkaau + + + rcl_kkbauq + + + rcl_kkbaun + + + rcl_kk_cloud_num_sea + + + rcl_kk_cloud_num_land + + + rcl_ai + + + rcl_bi + + + rcl_ci + + + rcl_di + + + rcl_x1i + + + rcl_x2i + + + rcl_x3i + + + rcl_x4i + + + rcl_const1i + + + rcl_const2i + + + rcl_const3i + + + rcl_const4i + + + rcl_const5i + + + rcl_const6i + + + rcl_apb1 + + + rcl_apb2 + + + rcl_apb3 + + + rcl_as + + + rcl_bs + + + rcl_cs + + + rcl_ds + + + rcl_x1s + + + rcl_x2s + + + rcl_x3s + + + rcl_x4s + + + rcl_const1s + + + rcl_const2s + + + rcl_const3s + + + rcl_const4s + + + rcl_const5s + + + rcl_const6s + + + rcl_const7s + + + rcl_const8s + + + rdenswat + + + rdensref + + + rcl_ar + + + rcl_br + + + rcl_cr + + + rcl_dr + + + rcl_x1r + + + rcl_x2r + + + rcl_x4r + + + rcl_ka273 + + + rcl_cdenom1 + + + rcl_cdenom2 + + + rcl_cdenom3 + + + rcl_schmidt + + + rcl_dynvisc + + + rcl_const1r + + + rcl_const2r + + + rcl_const3r + + + rcl_const4r + + + rcl_fac1 + + + rcl_fac2 + + + rcl_const5r + + + rcl_const6r + + + rcl_fzrab + + + rcl_fzrbb + + + lcldextra + + + lcldbudget + + + nssopt + + + ncldtop + + + naeclbc + + + naecldu + + + naeclom + + + naeclss + + + naeclsu + + + nclddiag + + + naercld + + + laerliqautolsp + + + laerliqautocp + + + laerliqautocpb + + + laerliqcoll + + + laericesed + + + laericeauto + + + nshapep + + + nshapeq + + + nbeta + + + rbeta + + + rbetap1 + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + + + + + + + selected_real_kind + + 13 + 300 + + + - - + + selected_real_kind @@ -30,62 +1497,68 @@ - - - - name - variable - - - - - + + + + + selected_real_kind + + 13 + 300 + + + - - - - - name - variable - - - - - + + + + + selected_real_kind + + 13 + 300 + + + - - + + - 4 + + selected_real_kind + + 13 + 300 + + - - - - - name - start - end - size - nlon - buffer - - - - - + + + + + selected_real_kind + + 13 + 300 + + + - - + + - 4 + + selected_real_kind + + 13 + 300 + + - - - - - + + selected_real_kind @@ -96,45 +1569,44 @@ - - - - - - 1 - - - size - - + + + + + selected_real_kind + + 13 + 300 + + + - - - name - start - end - size - nlon - nlev - buffer - - - - - + + + + + selected_real_kind + + 13 + 300 + + + - - + + - 4 + + selected_real_kind + + 13 + 300 + + - - - - - - + + selected_real_kind @@ -145,65 +1617,66 @@ - - - - - - 1 - - - size - - + + + + + selected_real_kind + + 13 + 300 + + + - - - - 1 - - - size - - - - - 1 - - - nlev - - + + + + + selected_real_kind + + 13 + 300 + + + - - - name - start - end - size - nlon - nlev - ndim - buffer - - - - - + + + + 4 + + + + + + 4 + + + + + + 4 + + + + + + + + + + 4 + - - + + 4 - - - - - - - + + selected_real_kind @@ -214,157 +1687,119 @@ - - - - - - 1 - - - size - - - - - - - 1 - - - size - - - - - 1 - - - nlev - - - - - - - 1 - - - size - - - - - 1 - - - nlev - - - - - 1 - - - ndim - - - - - - name - start - end - size - nlon - buffer - - - - - + + + + + selected_real_kind + + 13 + 300 + + + - - + + 4 - - - - - - - + + + + + selected_real_kind + + 13 + 300 + + + + + + - 1 + 0 - size + 100 - - - name - start - end - size - nlon - buffer - - - - - - - - - - 4 - - - - - - - + - 4 + + selected_real_kind + + 13 + 300 + + - - - + + - 1 + 0 - size + 100 + + + + + + + + - + jpim - + jprb - - load_scalar + + nclv + + + ncldql + + + ncldqi + + + ncldqr - - load_array + + ncldqs + + + ncldqv + + + tecldp + + + selected_real_kind + + + yrecldp + + + yrecldp_load_parameters - jpim + jpim selected_int_kind @@ -375,7 +1810,7 @@ - jprb + jprb selected_real_kind @@ -386,6 +1821,42 @@ + + nclv + + 5 + + + + ncldql + + 1 + + + + ncldqi + + 2 + + + + ncldqr + + 3 + + + + ncldqs + + 4 + + + + ncldqv + + 5 + + From fabd8450e8c29fd8f2f04546b726e8d59ed1c0a1 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Tue, 13 Feb 2024 10:43:53 +0000 Subject: [PATCH 156/174] Build CUDA variants with HDF5 (in addition to Serialbox) --- .github/scripts/verify-targets.sh | 5 +- src/cloudsc_cuda/CMakeLists.txt | 45 ++- src/cloudsc_cuda/cloudsc/load_state.cu | 391 +++++++++++++++++++++++-- 3 files changed, 392 insertions(+), 49 deletions(-) diff --git a/.github/scripts/verify-targets.sh b/.github/scripts/verify-targets.sh index c2c67147..240d0ace 100755 --- a/.github/scripts/verify-targets.sh +++ b/.github/scripts/verify-targets.sh @@ -21,10 +21,7 @@ then if [[ "$build_flags" == *"--with-cuda"* ]] then targets+=(dwarf-cloudsc-gpu-scc-cuf dwarf-cloudsc-gpu-scc-cuf-k-caching) - if [[ "$io_library_flag" == "--with-serialbox" ]] - then - targets+=(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-k-caching) - fi + targets+=(dwarf-cloudsc-c-cuda dwarf-cloudsc-c-cuda-hoist dwarf-cloudsc-c-cuda-k-caching) fi fi diff --git a/src/cloudsc_cuda/CMakeLists.txt b/src/cloudsc_cuda/CMakeLists.txt index 5393d3e6..04d5da1b 100644 --- a/src/cloudsc_cuda/CMakeLists.txt +++ b/src/cloudsc_cuda/CMakeLists.txt @@ -9,7 +9,7 @@ # Define this dwarf variant as an ECBuild feature ecbuild_add_option( FEATURE CLOUDSC_C_CUDA DESCRIPTION "Build the CUDA version of CLOUDSC C using Serialbox" DEFAULT ON - CONDITION Serialbox_FOUND AND HAVE_CUDA + CONDITION (Serialbox_FOUND OR HDF5_FOUND) AND HAVE_CUDA ) if( HAVE_CLOUDSC_C_CUDA ) @@ -36,9 +36,12 @@ if( HAVE_CLOUDSC_C_CUDA ) PUBLIC_INCLUDES $ $ - PUBLIC_LIBS - Serialbox::Serialbox_C - $<${HAVE_OMP}:OpenMP::OpenMP_C> + PUBLIC_LIBS + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> + $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) target_include_directories( @@ -91,9 +94,12 @@ if( HAVE_CLOUDSC_C_CUDA ) PUBLIC_INCLUDES $ $ - PUBLIC_LIBS - Serialbox::Serialbox_C - $<${HAVE_OMP}:OpenMP::OpenMP_C> + PUBLIC_LIBS + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> + $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) target_include_directories( @@ -146,9 +152,12 @@ if( HAVE_CLOUDSC_C_CUDA ) PUBLIC_INCLUDES $ $ - PUBLIC_LIBS - Serialbox::Serialbox_C - $<${HAVE_OMP}:OpenMP::OpenMP_C> + PUBLIC_LIBS + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> + $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) target_include_directories( @@ -181,9 +190,19 @@ if( HAVE_CLOUDSC_C_CUDA ) ) ### - # Create symlink for the input data - execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink - ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + + # Create symlink for the input data + if( HAVE_SERIALBOX ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + endif() + + if( HAVE_HDF5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) + endif() else() ecbuild_info( "Serialbox and/or CUDA not found, disabling CUDA prototype(s)" ) diff --git a/src/cloudsc_cuda/cloudsc/load_state.cu b/src/cloudsc_cuda/cloudsc/load_state.cu index 01e61898..5826c4dc 100644 --- a/src/cloudsc_cuda/cloudsc/load_state.cu +++ b/src/cloudsc_cuda/cloudsc/load_state.cu @@ -11,15 +11,40 @@ #include "load_state.h" #include +#ifdef HAVE_SERIALBOX #include "serialbox-c/Serialbox.h" +#endif +#ifdef HAVE_HDF5 +#include "hdf5.h" +#define INPUT_FILE "input.h5" +#define REFERENCE_FILE "reference.h5" +#endif #define min(a, b) (((a) < (b)) ? (a) : (b)) #define max(a, b) (((a) > (b)) ? (a) : (b)) +#ifdef HAVE_HDF5 +void read_hdf5_int(hid_t file_id, const char *name, int *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} + +void read_hdf5(hid_t file_id, const char *name, double *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} +#endif /* Query sizes and dimensions of state arrays */ void query_state(int *klon, int *klev) { +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); serialboxMetainfo_t* globalMetainfo = serialboxSerializerGetGlobalMetainfo(serializer); @@ -28,6 +53,17 @@ void query_state(int *klon, int *klev) serialboxMetainfoDestroy(globalMetainfo); serialboxSerializerDestroy(serializer); +#endif +#ifdef HAVE_HDF5 + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(INPUT_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + read_hdf5_int(file_id, "/KLEV", klev); + read_hdf5_int(file_id, "/KLON", klon); + + status = H5Fclose(file_id); +#endif } void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) @@ -46,13 +82,14 @@ void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngpto // Zero out the remainder of last block /* - int bsize = min(nproma, ngptot - (nblocks-1)*nproma); // Size of the field block + bsize = min(nproma, ngptot - (nblocks-1)*nproma); // Size of the field block printf("zeroing last block : %d \n",bsize); for (i=bsize; iramid); + read_hdf5(file_id, "/YRECLDP_RCLDIFF", &yrecldp->rcldiff); + read_hdf5(file_id, "/YRECLDP_RCLDIFF_CONVI", &yrecldp->rcldiff_convi); + read_hdf5(file_id, "/YRECLDP_RCLCRIT", &yrecldp->rclcrit); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_SEA", &yrecldp->rclcrit_sea); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_LAND", &yrecldp->rclcrit_land); + read_hdf5(file_id, "/YRECLDP_RKCONV", &yrecldp->rkconv); + read_hdf5(file_id, "/YRECLDP_RPRC1", &yrecldp->rprc1); + read_hdf5(file_id, "/YRECLDP_RPRC2", &yrecldp->rprc2); + read_hdf5(file_id, "/YRECLDP_RCLDMAX", &yrecldp->rcldmax); + read_hdf5(file_id, "/YRECLDP_RPECONS", &yrecldp->rpecons); + read_hdf5(file_id, "/YRECLDP_RVRFACTOR", &yrecldp->rvrfactor); + read_hdf5(file_id, "/YRECLDP_RPRECRHMAX", &yrecldp->rprecrhmax); + read_hdf5(file_id, "/YRECLDP_RTAUMEL", &yrecldp->rtaumel); + read_hdf5(file_id, "/YRECLDP_RAMIN", &yrecldp->ramin); + read_hdf5(file_id, "/YRECLDP_RLMIN", &yrecldp->rlmin); + read_hdf5(file_id, "/YRECLDP_RKOOPTAU", &yrecldp->rkooptau); + read_hdf5(file_id, "/YRECLDP_RCLDTOPP", &yrecldp->rcldtopp); + read_hdf5(file_id, "/YRECLDP_RLCRITSNOW", &yrecldp->rlcritsnow); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN1", &yrecldp->rsnowlin1); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN2", &yrecldp->rsnowlin2); + read_hdf5(file_id, "/YRECLDP_RICEHI1", &yrecldp->ricehi1); + read_hdf5(file_id, "/YRECLDP_RICEHI2", &yrecldp->ricehi2); + read_hdf5(file_id, "/YRECLDP_RICEINIT", &yrecldp->riceinit); + read_hdf5(file_id, "/YRECLDP_RVICE", &yrecldp->rvice); + read_hdf5(file_id, "/YRECLDP_RVRAIN", &yrecldp->rvrain); + read_hdf5(file_id, "/YRECLDP_RVSNOW", &yrecldp->rvsnow); + read_hdf5(file_id, "/YRECLDP_RTHOMO", &yrecldp->rthomo); + read_hdf5(file_id, "/YRECLDP_RCOVPMIN", &yrecldp->rcovpmin); + read_hdf5(file_id, "/YRECLDP_RCCN", &yrecldp->rccn); + read_hdf5(file_id, "/YRECLDP_RNICE", &yrecldp->rnice); + read_hdf5(file_id, "/YRECLDP_RCCNOM", &yrecldp->rccnom); + read_hdf5(file_id, "/YRECLDP_RCCNSS", &yrecldp->rccnss); + read_hdf5(file_id, "/YRECLDP_RCCNSU", &yrecldp->rccnsu); + read_hdf5(file_id, "/YRECLDP_RCLDTOPCF", &yrecldp->rcldtopcf); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFRATE", &yrecldp->rdepliqrefrate); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFDEPTH", &yrecldp->rdepliqrefdepth); + read_hdf5(file_id, "/YRECLDP_RCL_KKAac", &yrecldp->rcl_kkaac); + read_hdf5(file_id, "/YRECLDP_RCL_KKBac", &yrecldp->rcl_kkbac); + read_hdf5(file_id, "/YRECLDP_RCL_KKAau", &yrecldp->rcl_kkaau); + read_hdf5(file_id, "/YRECLDP_RCL_KKBauq", &yrecldp->rcl_kkbauq); + read_hdf5(file_id, "/YRECLDP_RCL_KKBaun", &yrecldp->rcl_kkbaun); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_sea", &yrecldp->rcl_kk_cloud_num_sea); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_land", &yrecldp->rcl_kk_cloud_num_land); + read_hdf5(file_id, "/YRECLDP_RCL_AI", &yrecldp->rcl_ai); + read_hdf5(file_id, "/YRECLDP_RCL_BI", &yrecldp->rcl_bi); + read_hdf5(file_id, "/YRECLDP_RCL_CI", &yrecldp->rcl_ci); + read_hdf5(file_id, "/YRECLDP_RCL_DI", &yrecldp->rcl_di); + read_hdf5(file_id, "/YRECLDP_RCL_X1I", &yrecldp->rcl_x1i); + read_hdf5(file_id, "/YRECLDP_RCL_X2I", &yrecldp->rcl_x2i); + read_hdf5(file_id, "/YRECLDP_RCL_X3I", &yrecldp->rcl_x3i); + read_hdf5(file_id, "/YRECLDP_RCL_X4I", &yrecldp->rcl_x4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1I", &yrecldp->rcl_const1i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2I", &yrecldp->rcl_const2i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3I", &yrecldp->rcl_const3i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4I", &yrecldp->rcl_const4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5I", &yrecldp->rcl_const5i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6I", &yrecldp->rcl_const6i); + read_hdf5(file_id, "/YRECLDP_RCL_APB1", &yrecldp->rcl_apb1); + read_hdf5(file_id, "/YRECLDP_RCL_APB2", &yrecldp->rcl_apb2); + read_hdf5(file_id, "/YRECLDP_RCL_APB3", &yrecldp->rcl_apb3); + read_hdf5(file_id, "/YRECLDP_RCL_AS", &yrecldp->rcl_as); + read_hdf5(file_id, "/YRECLDP_RCL_BS", &yrecldp->rcl_bs); + read_hdf5(file_id, "/YRECLDP_RCL_CS", &yrecldp->rcl_cs); + read_hdf5(file_id, "/YRECLDP_RCL_DS", &yrecldp->rcl_ds); + read_hdf5(file_id, "/YRECLDP_RCL_X1S", &yrecldp->rcl_x1s); + read_hdf5(file_id, "/YRECLDP_RCL_X2S", &yrecldp->rcl_x2s); + read_hdf5(file_id, "/YRECLDP_RCL_X3S", &yrecldp->rcl_x3s); + read_hdf5(file_id, "/YRECLDP_RCL_X4S", &yrecldp->rcl_x4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1S", &yrecldp->rcl_const1s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2S", &yrecldp->rcl_const2s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3S", &yrecldp->rcl_const3s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4S", &yrecldp->rcl_const4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5S", &yrecldp->rcl_const5s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6S", &yrecldp->rcl_const6s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST7S", &yrecldp->rcl_const7s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST8S", &yrecldp->rcl_const8s); + read_hdf5(file_id, "/YRECLDP_RDENSWAT", &yrecldp->rdenswat); + read_hdf5(file_id, "/YRECLDP_RDENSREF", &yrecldp->rdensref); + read_hdf5(file_id, "/YRECLDP_RCL_AR", &yrecldp->rcl_ar); + read_hdf5(file_id, "/YRECLDP_RCL_BR", &yrecldp->rcl_br); + read_hdf5(file_id, "/YRECLDP_RCL_CR", &yrecldp->rcl_cr); + read_hdf5(file_id, "/YRECLDP_RCL_DR", &yrecldp->rcl_dr); + read_hdf5(file_id, "/YRECLDP_RCL_X1R", &yrecldp->rcl_x1r); + read_hdf5(file_id, "/YRECLDP_RCL_X2R", &yrecldp->rcl_x2r); + read_hdf5(file_id, "/YRECLDP_RCL_X4R", &yrecldp->rcl_x4r); + read_hdf5(file_id, "/YRECLDP_RCL_KA273", &yrecldp->rcl_ka273); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM1", &yrecldp->rcl_cdenom1); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM2", &yrecldp->rcl_cdenom2); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM3", &yrecldp->rcl_cdenom3); + read_hdf5(file_id, "/YRECLDP_RCL_SCHMIDT", &yrecldp->rcl_schmidt); + read_hdf5(file_id, "/YRECLDP_RCL_DYNVISC", &yrecldp->rcl_dynvisc); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1R", &yrecldp->rcl_const1r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2R", &yrecldp->rcl_const2r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3R", &yrecldp->rcl_const3r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4R", &yrecldp->rcl_const4r); + read_hdf5(file_id, "/YRECLDP_RCL_FAC1", &yrecldp->rcl_fac1); + read_hdf5(file_id, "/YRECLDP_RCL_FAC2", &yrecldp->rcl_fac2); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5R", &yrecldp->rcl_const5r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6R", &yrecldp->rcl_const6r); + read_hdf5(file_id, "/YRECLDP_RCL_FZRAB", &yrecldp->rcl_fzrab); + read_hdf5(file_id, "/YRECLDP_RCL_FZRBB", &yrecldp->rcl_fzrbb); + read_hdf5_int(file_id, "/YRECLDP_LCLDEXTRA", &yrecldp->lcldextra); // Bool + read_hdf5_int(file_id, "/YRECLDP_LCLDBUDGET", &yrecldp->lcldbudget); // Bool + read_hdf5_int(file_id, "/YRECLDP_NSSOPT", &yrecldp->nssopt); + read_hdf5_int(file_id, "/YRECLDP_NCLDTOP", &yrecldp->ncldtop); + read_hdf5_int(file_id, "/YRECLDP_NAECLBC", &yrecldp->naeclbc); + read_hdf5_int(file_id, "/YRECLDP_NAECLDU", &yrecldp->naecldu); + read_hdf5_int(file_id, "/YRECLDP_NAECLOM", &yrecldp->naeclom); + read_hdf5_int(file_id, "/YRECLDP_NAECLSS", &yrecldp->naeclss); + read_hdf5_int(file_id, "/YRECLDP_NAECLSU", &yrecldp->naeclsu); + read_hdf5_int(file_id, "/YRECLDP_NCLDDIAG", &yrecldp->nclddiag); + read_hdf5_int(file_id, "/YRECLDP_NAERCLD", &yrecldp->naercld); + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOLSP", &yrecldp->laerliqautolsp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCP", &yrecldp->laerliqautocp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCPB", &yrecldp->laerliqautocpb); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQCOLL", &yrecldp->laerliqcoll); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICESED", &yrecldp->laericesed); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICEAUTO", &yrecldp->laericeauto); // Bool + read_hdf5(file_id, "/YRECLDP_NSHAPEP", &yrecldp->nshapep); + read_hdf5(file_id, "/YRECLDP_NSHAPEQ", &yrecldp->nshapeq); + read_hdf5_int(file_id, "/YRECLDP_NBETA", &yrecldp->nbeta); + + status = H5Fclose(file_id); + +#endif + } @@ -417,13 +709,15 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) { + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "reference", "Binary"); serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); serialboxSavepoint_t* savepoint = savepoints[0]; - int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); - load_and_expand_2d(serializer, savepoint, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); load_and_expand_2d(serializer, savepoint, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); load_and_expand_1d(serializer, savepoint, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); @@ -448,4 +742,37 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng serialboxSerializerDestroySavepointVector(savepoints, 1); serialboxSerializerDestroy(serializer); +#endif + +#ifdef HAVE_HDF5 + + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(REFERENCE_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + load_and_expand_2d(file_id, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(file_id, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); + load_and_expand_1d(file_id, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); + load_and_expand_2d(file_id, "PFSQLF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqlf); + load_and_expand_2d(file_id, "PFSQIF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqif); + load_and_expand_2d(file_id, "PFCQLNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqlng); + load_and_expand_2d(file_id, "PFCQNNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqnng); + load_and_expand_2d(file_id, "PFSQRF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqrf); + load_and_expand_2d(file_id, "PFSQSF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqsf); + load_and_expand_2d(file_id, "PFCQRNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqrng); + load_and_expand_2d(file_id, "PFCQSNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqsng); + load_and_expand_2d(file_id, "PFSQLTUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqltur); + load_and_expand_2d(file_id, "PFSQITUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqitur); + load_and_expand_2d(file_id, "PFPLSL", nlon, nlev+1, nproma, ngptot, nblocks, pfplsl); + load_and_expand_2d(file_id, "PFPLSN", nlon, nlev+1, nproma, ngptot, nblocks, pfplsn); + load_and_expand_2d(file_id, "PFHPSL", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsl); + load_and_expand_2d(file_id, "PFHPSN", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsn); + load_and_expand_2d(file_id, "TENDENCY_LOC_T", nlon, nlev, nproma, ngptot, nblocks, tend_loc_t); + load_and_expand_2d(file_id, "TENDENCY_LOC_Q", nlon, nlev, nproma, ngptot, nblocks, tend_loc_q); + load_and_expand_2d(file_id, "TENDENCY_LOC_A", nlon, nlev, nproma, ngptot, nblocks, tend_loc_a); + load_and_expand_3d(file_id, "TENDENCY_LOC_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_loc_cld); + + status = H5Fclose(file_id); +#endif + } From f729c652301068900577a32a400b8475d1d869f1 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Tue, 13 Feb 2024 12:19:56 +0000 Subject: [PATCH 157/174] CUDA load and validate: use index arithmetic instead of pointer hack --- src/cloudsc_cuda/cloudsc/cloudsc_validate.cu | 27 +++--- src/cloudsc_cuda/cloudsc/load_state.cu | 93 +++++--------------- 2 files changed, 34 insertions(+), 86 deletions(-) diff --git a/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu b/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu index c84dd3c7..8998115e 100644 --- a/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu +++ b/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu @@ -64,14 +64,14 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i for (b = 0; b < nblocks; b++) { bsize = min(nlon, ngptot - b*nlon); // field block size for (jk = 0; jk < bsize; jk++) { - zminval = fmin(zminval, field[b][jk]); - zmaxval = fmax(zmaxval, field[b][jk]); + zminval = fmin(zminval, v_field[b*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nlon+jk]); // Difference against reference result in one-norm sense - zdiff = fabs(field[b][jk] - reference[b][jk]); + zdiff = fabs(v_field[b*nlon+jk] - v_ref[b*nlon+jk]); zmaxerr = fmax(zmaxerr, zdiff); zerrsum = zerrsum + zdiff; - zsum = zsum + abs(reference[b][jk]); + zsum = zsum + abs(v_ref[b*nlon+jk]); } } zavgpgp = zerrsum / (double) ngptot; @@ -99,13 +99,14 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int bsize = min(nlon, ngptot - b*nlon); // field block size for (jl = 0; jl < nlev; jl++) { for (jk = 0; jk < bsize; jk++) { - zminval = fmin(zminval, field[b][jl][jk]); - zmaxval = fmax(zmaxval, field[b][jl][jk]); + zminval = fmin(zminval, v_field[b*nlev*nlon+jl*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nlev*nlon+jl*nlon+jk]); + // Difference against reference result in one-norm sense - zdiff = fabs(field[b][jl][jk] - reference[b][jl][jk]); + zdiff = fabs(v_field[b*nlev*nlon+jl*nlon+jk] - v_ref[b*nlev*nlon+jl*nlon+jk]); zmaxerr = fmax(zmaxerr, zdiff); zerrsum = zerrsum + zdiff; - zsum = zsum + abs(reference[b][jl][jk]); + zsum = zsum + abs(v_ref[b*nlev*nlon+jl*nlon+jk]); } } } @@ -136,18 +137,18 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, for (jm = 0; jm < nclv; jm++) { for (jl = 0; jl < nlev; jl++) { for (jk = 0; jk < bsize; jk++) { - zminval = fmin(zminval, field[b][jm][jl][jk]); - zmaxval = fmax(zmaxval, field[b][jm][jl][jk]); + zminval = fmin(zminval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); + zmaxval = fmax(zmaxval, v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); // Difference against reference result in one-norm sense - zdiff = fabs(field[b][jm][jl][jk] - reference[b][jm][jl][jk]); + zdiff = fabs(v_field[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk] - v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); zmaxerr = fmax(zmaxerr, zdiff); zerrsum = zerrsum + zdiff; - zsum = zsum + abs(reference[b][jm][jl][jk]); + zsum = zsum + abs(v_ref[b*nclv*nlev*nlon+jm*nlev*nlon+jl*nlon+jk]); } } } - } + } zavgpgp = zerrsum / (double) ngptot; print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); } diff --git a/src/cloudsc_cuda/cloudsc/load_state.cu b/src/cloudsc_cuda/cloudsc/load_state.cu index 5826c4dc..767fe257 100644 --- a/src/cloudsc_cuda/cloudsc/load_state.cu +++ b/src/cloudsc_cuda/cloudsc/load_state.cu @@ -68,119 +68,66 @@ void query_state(int *klon, int *klev) void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) { - int b, l, i, buf_start_idx, buf_idx; - double (*field)[nproma] = (double (*)[nproma]) field_in; + int b, i, buf_start_idx, buf_idx; -#pragma omp parallel for default(shared) private(b, l, i, buf_start_idx, buf_idx) +#pragma omp parallel for default(shared) private(b, i, buf_start_idx, buf_idx) for (b = 0; b < nblocks; b++) { buf_start_idx = ((b)*nproma) % nlon; for (i = 0; i < nproma; i++) { buf_idx = (buf_start_idx + i) % nlon; - field[b][i] = buffer[buf_idx]; - } - } - - // Zero out the remainder of last block - /* - bsize = min(nproma, ngptot - (nblocks-1)*nproma); // Size of the field block - printf("zeroing last block : %d \n",bsize); - for (i=bsize; i Date: Tue, 13 Feb 2024 12:21:59 +0000 Subject: [PATCH 158/174] Build SYCL variants with HDF5 (in addition to Serialbox) --- src/cloudsc_sycl/CMakeLists.txt | 34 +- src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp | 19 +- src/cloudsc_sycl/cloudsc/load_state.cpp | 371 ++++++++++++++++-- 3 files changed, 381 insertions(+), 43 deletions(-) diff --git a/src/cloudsc_sycl/CMakeLists.txt b/src/cloudsc_sycl/CMakeLists.txt index 1399447b..eac11d27 100644 --- a/src/cloudsc_sycl/CMakeLists.txt +++ b/src/cloudsc_sycl/CMakeLists.txt @@ -9,7 +9,7 @@ # Define this dwarf variant as an ECBuild feature ecbuild_add_option( FEATURE CLOUDSC_SYCL DESCRIPTION "Build the SYCL version CLOUDSC using Serialbox" DEFAULT ON - CONDITION Serialbox_FOUND AND HAVE_SYCL + CONDITION (Serialbox_FOUND OR HDF5_FOUND) AND HAVE_SYCL ) if( HAVE_CLOUDSC_SYCL ) @@ -32,8 +32,11 @@ if( HAVE_CLOUDSC_SYCL ) $ $ PUBLIC_LIBS - Serialbox::Serialbox_C - $<${HAVE_OMP}:OpenMP::OpenMP_C> + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> + $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) add_sycl_to_target( @@ -77,8 +80,11 @@ if( HAVE_CLOUDSC_SYCL ) $ $ PUBLIC_LIBS - Serialbox::Serialbox_C + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) add_sycl_to_target( @@ -122,8 +128,11 @@ if( HAVE_CLOUDSC_SYCL ) $ $ PUBLIC_LIBS - Serialbox::Serialbox_C + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) add_sycl_to_target( @@ -147,9 +156,18 @@ if( HAVE_CLOUDSC_SYCL ) OMP 1 ) - # Create symlink for the input data - execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink - ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + # Create symlink for the input data + if( HAVE_SERIALBOX ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + endif() + + if( HAVE_HDF5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) + endif() else() ecbuild_info( "Serialbox not found, disabling SYCL version" ) diff --git a/src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp b/src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp index 710614f9..7fc35bfe 100644 --- a/src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp +++ b/src/cloudsc_sycl/cloudsc/cloudsc_validate.cpp @@ -51,8 +51,6 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jk; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; - //double (*field)[nlon] = (double (*)[nlon]) v_field; - //double (*reference)[nlon] = (double (*)[nlon]) v_ref; zminval = +std::numeric_limits::max(); zmaxval = -std::numeric_limits::max(); @@ -60,8 +58,8 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i zerrsum = 0.0; zsum = 0.0; -// #pragma omp parallel for default(shared) private(b, bsize, jk) \ -// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) +#pragma omp parallel for default(shared) private(b, bsize, jk) \ + reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) for (b = 0; b < nblocks; b++) { bsize = min(nlon, ngptot - b*nlon); // field block size for (jk = 0; jk < bsize; jk++) { @@ -85,8 +83,6 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jl, jk; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; -// double (*field)[nlev][nlon] = (double (*)[nlev][nlon]) v_field; -// double (*reference)[nlev][nlon] = (double (*)[nlev][nlon]) v_ref; zminval = +std::numeric_limits::max(); zmaxval = -std::numeric_limits::max(); @@ -94,9 +90,8 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int zerrsum = 0.0; zsum = 0.0; -// #pragma omp parallel for default(shared) private(b, bsize, jl, jk) \ -// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) - +#pragma omp parallel for default(shared) private(b, bsize, jl, jk) \ + reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) for (b = 0; b < nblocks; b++) { bsize = min(nlon, ngptot - b*nlon); // field block size for (jl = 0; jl < nlev; jl++) { @@ -124,8 +119,6 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jl, jk, jm; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; -// double (*field)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_field; -// double (*reference)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_ref; zminval = +std::numeric_limits::max(); zmaxval = -std::numeric_limits::max(); @@ -133,8 +126,8 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, zerrsum = 0.0; zsum = 0.0; -// #pragma omp parallel for default(shared) private(b, bsize, jl, jk, jm) \ -// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) +#pragma omp parallel for default(shared) private(b, bsize, jl, jk, jm) \ + reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) for (b = 0; b < nblocks; b++) { bsize = min(nlon, ngptot - b*nlon); // field block size for (jm = 0; jm < nclv; jm++) { diff --git a/src/cloudsc_sycl/cloudsc/load_state.cpp b/src/cloudsc_sycl/cloudsc/load_state.cpp index 034270ca..767fe257 100644 --- a/src/cloudsc_sycl/cloudsc/load_state.cpp +++ b/src/cloudsc_sycl/cloudsc/load_state.cpp @@ -9,17 +9,42 @@ */ #include "load_state.h" -#include #include +#ifdef HAVE_SERIALBOX #include "serialbox-c/Serialbox.h" +#endif +#ifdef HAVE_HDF5 +#include "hdf5.h" +#define INPUT_FILE "input.h5" +#define REFERENCE_FILE "reference.h5" +#endif #define min(a, b) (((a) < (b)) ? (a) : (b)) #define max(a, b) (((a) > (b)) ? (a) : (b)) +#ifdef HAVE_HDF5 +void read_hdf5_int(hid_t file_id, const char *name, int *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} + +void read_hdf5(hid_t file_id, const char *name, double *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} +#endif + /* Query sizes and dimensions of state arrays */ void query_state(int *klon, int *klev) { +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); serialboxMetainfo_t* globalMetainfo = serialboxSerializerGetGlobalMetainfo(serializer); @@ -28,6 +53,17 @@ void query_state(int *klon, int *klev) serialboxMetainfoDestroy(globalMetainfo); serialboxSerializerDestroy(serializer); +#endif +#ifdef HAVE_HDF5 + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(INPUT_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + read_hdf5_int(file_id, "/KLEV", klev); + read_hdf5_int(file_id, "/KLON", klon); + + status = H5Fclose(file_id); +#endif } void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) @@ -94,7 +130,7 @@ void expand_3d(double *buffer_in, double *field_in, int nlon, int nlev, int nclv } } - +#ifdef HAVE_SERIALBOX void load_and_expand_1d(serialboxSerializer_t *serializer, serialboxSavepoint_t* savepoint, const char *name, int nlon, int nproma, int ngptot, int nblocks, double *field) { @@ -134,32 +170,83 @@ void load_and_expand_3d(serialboxSerializer_t *serializer, serialboxSavepoint_t* serialboxSerializerRead(serializer, name, savepoint, buffer, strides, 3); expand_3d((double *)buffer, field, nlon, nlev, nclv, nproma, ngptot, nblocks); } +#endif +#if HAVE_HDF5 +void load_and_expand_1d(hid_t file_id, const char *name, int nlon, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nlon]; + int strides[1] = {1}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); +} -/* Read input state into memory */ -void load_state(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, - double* ptsphy, double* plcrit_aer, double* picrit_aer, - double* pre_ice, double* pccn, double* pnice, double* pt, double* pq, - double* tend_cml_t, double* tend_cml_q, double* tend_cml_a, double* tend_cml_cld, - double* tend_tmp_t, double* tend_tmp_q, double* tend_tmp_a, double* tend_tmp_cld, - double* pvfa, double* pvfl, double* pvfi, double* pdyna, double* pdynl, double* pdyni, - double* phrsw, double* phrlw, double* pvervel, double* pap, double* paph, double* plsm, - int* ktype, double* plu, double* plude, double* psnde, double* pmfu, - double* pmfd, double* pa, double* pclv, double* psupsat, struct TECLDP* yrecldp, - double* rg, double* rd, double* rcpd, double* retv, double* rlvtt, double* rlstt, - double* rlmlt, double* rtt, double* rv, double* r2es, double* r3les, double* r3ies, - double* r4les, double* r4ies, double* r5les, double* r5ies, double* r5alvcp, double* r5alscp, - double* ralvdcp, double* ralsdcp, double* ralfdcp, double* rtwat, - double* rtice, double* rticecu, double* rtwat_rtice_r, double *rtwat_rticecu_r, - double* rkoop1, double* rkoop2 ) +void load_and_expand_1d_int(hid_t file_id, const char *name, int nlon, int nproma, int ngptot, int nblocks, int *field) { + int buffer[nlon]; + int strides[1] = {1}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_1d_int((int *)buffer, field, nlon, nproma, ngptot, nblocks); +} + +void load_and_expand_2d(hid_t file_id, const char *name, int nlon, int nlev, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nlev][nlon]; + int strides[2] = {1, nlon}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_2d((double *)buffer, field, nlon, nlev, nproma, ngptot, nblocks); +} + +void load_and_expand_3d(hid_t file_id, const char *name, int nlon, int nlev, int nclv, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nclv][nlev][nlon]; + int strides[3] = {1, nlon, nlev*nlon}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_3d((double *)buffer, field, nlon, nlev, nclv, nproma, ngptot, nblocks); +} +#endif + +/* Read input state into memory */ +void load_state(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double* ptsphy, double* plcrit_aer, double* picrit_aer, + double* pre_ice, double* pccn, double* pnice, double* pt, double* pq, + double* tend_cml_t, double* tend_cml_q, double* tend_cml_a, double* tend_cml_cld, + double* tend_tmp_t, double* tend_tmp_q, double* tend_tmp_a, double* tend_tmp_cld, + double* pvfa, double* pvfl, double* pvfi, double* pdyna, double* pdynl, double* pdyni, + double* phrsw, double* phrlw, double* pvervel, double* pap, double* paph, double* plsm, + int* ktype, double* plu, double* plude, double* psnde, double* pmfu, + double* pmfd, double* pa, double* pclv, double* psupsat, struct TECLDP* yrecldp, + double* rg, double* rd, double* rcpd, double* retv, double* rlvtt, double* rlstt, + double* rlmlt, double* rtt, double* rv, double* r2es, double* r3les, double* r3ies, + double* r4les, double* r4ies, double* r5les, double* r5ies, double* r5alvcp, double* r5alscp, + double* ralvdcp, double* ralsdcp, double* ralfdcp, double* rtwat, + double* rtice, double* rticecu, double* rtwat_rtice_r, double *rtwat_rticecu_r, + double* rkoop1, double* rkoop2) { + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); serialboxSavepoint_t* savepoint = savepoints[0]; - int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); - load_and_expand_2d(serializer, savepoint, "PLCRIT_AER", nlon, nlev, nproma, ngptot, nblocks, plcrit_aer); load_and_expand_2d(serializer, savepoint, "PICRIT_AER", nlon, nlev, nproma, ngptot, nblocks, picrit_aer); load_and_expand_2d(serializer, savepoint, "PRE_ICE", nlon, nlev, nproma, ngptot, nblocks, pre_ice); @@ -355,8 +442,213 @@ void load_state(const int nlon, const int nlev, const int nclv, const int ngptot serialboxSerializerDestroySavepointVector(savepoints, 1); serialboxSerializerDestroy(serializer); +#endif + +#ifdef HAVE_HDF5 + + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(INPUT_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + load_and_expand_2d(file_id, "PLCRIT_AER", nlon, nlev, nproma, ngptot, nblocks, plcrit_aer); + load_and_expand_2d(file_id, "PICRIT_AER", nlon, nlev, nproma, ngptot, nblocks, picrit_aer); + load_and_expand_2d(file_id, "PRE_ICE", nlon, nlev, nproma, ngptot, nblocks, pre_ice); + load_and_expand_2d(file_id, "PCCN", nlon, nlev, nproma, ngptot, nblocks, pccn); + load_and_expand_2d(file_id, "PNICE", nlon, nlev, nproma, ngptot, nblocks, pnice); + load_and_expand_2d(file_id, "PT", nlon, nlev, nproma, ngptot, nblocks, pt); + load_and_expand_2d(file_id, "PQ", nlon, nlev, nproma, ngptot, nblocks, pq); + load_and_expand_2d(file_id, "TENDENCY_CML_T", nlon, nlev, nproma, ngptot, nblocks, tend_cml_t); + load_and_expand_2d(file_id, "TENDENCY_CML_Q", nlon, nlev, nproma, ngptot, nblocks, tend_cml_q); + load_and_expand_2d(file_id, "TENDENCY_CML_A", nlon, nlev, nproma, ngptot, nblocks, tend_cml_a); + load_and_expand_3d(file_id, "TENDENCY_CML_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_cml_cld); + load_and_expand_2d(file_id, "TENDENCY_TMP_T", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_t); + load_and_expand_2d(file_id, "TENDENCY_TMP_Q", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_q); + load_and_expand_2d(file_id, "TENDENCY_TMP_A", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_a); + load_and_expand_3d(file_id, "TENDENCY_TMP_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_tmp_cld); + load_and_expand_2d(file_id, "PVFA", nlon, nlev, nproma, ngptot, nblocks, pvfa); + load_and_expand_2d(file_id, "PVFL", nlon, nlev, nproma, ngptot, nblocks, pvfl); + load_and_expand_2d(file_id, "PVFI", nlon, nlev, nproma, ngptot, nblocks, pvfi); + load_and_expand_2d(file_id, "PDYNA", nlon, nlev, nproma, ngptot, nblocks, pdyna); + load_and_expand_2d(file_id, "PDYNL", nlon, nlev, nproma, ngptot, nblocks, pdynl); + load_and_expand_2d(file_id, "PDYNI", nlon, nlev, nproma, ngptot, nblocks, pdyni); + load_and_expand_2d(file_id, "PHRSW", nlon, nlev, nproma, ngptot, nblocks, phrsw); + load_and_expand_2d(file_id, "PHRLW", nlon, nlev, nproma, ngptot, nblocks, phrlw); + load_and_expand_2d(file_id, "PVERVEL", nlon, nlev, nproma, ngptot, nblocks, pvervel); + load_and_expand_2d(file_id, "PAP", nlon, nlev, nproma, ngptot, nblocks, pap); + load_and_expand_2d(file_id, "PAPH", nlon, nlev+1, nproma, ngptot, nblocks, paph); + load_and_expand_1d(file_id, "PLSM", nlon, nproma, ngptot, nblocks, plsm); + load_and_expand_1d_int(file_id, "KTYPE", nlon, nproma, ngptot, nblocks, ktype); + load_and_expand_2d(file_id, "PLU", nlon, nlev, nproma, ngptot, nblocks, plu); + load_and_expand_2d(file_id, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(file_id, "PSNDE", nlon, nlev, nproma, ngptot, nblocks, psnde); + load_and_expand_2d(file_id, "PMFU", nlon, nlev, nproma, ngptot, nblocks, pmfu); + load_and_expand_2d(file_id, "PMFD", nlon, nlev, nproma, ngptot, nblocks, pmfd); + load_and_expand_2d(file_id, "PA", nlon, nlev, nproma, ngptot, nblocks, pa); + load_and_expand_3d(file_id, "PCLV", nlon, nlev, nclv, nproma, ngptot, nblocks, pclv); + load_and_expand_2d(file_id, "PSUPSAT", nlon, nlev, nproma, ngptot, nblocks, psupsat); + + read_hdf5(file_id, "/PTSPHY", ptsphy); + + read_hdf5(file_id, "/RG", rg); + read_hdf5(file_id, "/RD", rd); + read_hdf5(file_id, "/RCPD", rcpd); + read_hdf5(file_id, "/RETV", retv); + read_hdf5(file_id, "/RLVTT", rlvtt); + read_hdf5(file_id, "/RLSTT", rlstt); + read_hdf5(file_id, "/RLMLT", rlmlt); + read_hdf5(file_id, "/RTT", rtt); + read_hdf5(file_id, "/RV", rv); + read_hdf5(file_id, "/R2ES", r2es); + read_hdf5(file_id, "/R3LES", r3les); + read_hdf5(file_id, "/R3IES", r3ies); + read_hdf5(file_id, "/R4LES", r4les); + read_hdf5(file_id, "/R4IES", r4ies); + read_hdf5(file_id, "/R5LES", r5les); + read_hdf5(file_id, "/R5IES", r5ies); + read_hdf5(file_id, "/R5ALVCP", r5alvcp); + read_hdf5(file_id, "/R5ALSCP", r5alscp); + read_hdf5(file_id, "/RALVDCP", ralvdcp); + read_hdf5(file_id, "/RALSDCP", ralsdcp); + read_hdf5(file_id, "/RALFDCP", ralfdcp); + read_hdf5(file_id, "/RTWAT", rtwat); + read_hdf5(file_id, "/RTICE", rtice); + read_hdf5(file_id, "/RTICECU", rticecu); + read_hdf5(file_id, "/RTWAT_RTICE_R", rtwat_rtice_r); + read_hdf5(file_id, "/RTWAT_RTICECU_R", rtwat_rticecu_r); + read_hdf5(file_id, "/RKOOP1", rkoop1); + read_hdf5(file_id, "/RKOOP2", rkoop2); + + read_hdf5(file_id, "/YRECLDP_RAMID", &yrecldp->ramid); + read_hdf5(file_id, "/YRECLDP_RCLDIFF", &yrecldp->rcldiff); + read_hdf5(file_id, "/YRECLDP_RCLDIFF_CONVI", &yrecldp->rcldiff_convi); + read_hdf5(file_id, "/YRECLDP_RCLCRIT", &yrecldp->rclcrit); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_SEA", &yrecldp->rclcrit_sea); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_LAND", &yrecldp->rclcrit_land); + read_hdf5(file_id, "/YRECLDP_RKCONV", &yrecldp->rkconv); + read_hdf5(file_id, "/YRECLDP_RPRC1", &yrecldp->rprc1); + read_hdf5(file_id, "/YRECLDP_RPRC2", &yrecldp->rprc2); + read_hdf5(file_id, "/YRECLDP_RCLDMAX", &yrecldp->rcldmax); + read_hdf5(file_id, "/YRECLDP_RPECONS", &yrecldp->rpecons); + read_hdf5(file_id, "/YRECLDP_RVRFACTOR", &yrecldp->rvrfactor); + read_hdf5(file_id, "/YRECLDP_RPRECRHMAX", &yrecldp->rprecrhmax); + read_hdf5(file_id, "/YRECLDP_RTAUMEL", &yrecldp->rtaumel); + read_hdf5(file_id, "/YRECLDP_RAMIN", &yrecldp->ramin); + read_hdf5(file_id, "/YRECLDP_RLMIN", &yrecldp->rlmin); + read_hdf5(file_id, "/YRECLDP_RKOOPTAU", &yrecldp->rkooptau); + read_hdf5(file_id, "/YRECLDP_RCLDTOPP", &yrecldp->rcldtopp); + read_hdf5(file_id, "/YRECLDP_RLCRITSNOW", &yrecldp->rlcritsnow); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN1", &yrecldp->rsnowlin1); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN2", &yrecldp->rsnowlin2); + read_hdf5(file_id, "/YRECLDP_RICEHI1", &yrecldp->ricehi1); + read_hdf5(file_id, "/YRECLDP_RICEHI2", &yrecldp->ricehi2); + read_hdf5(file_id, "/YRECLDP_RICEINIT", &yrecldp->riceinit); + read_hdf5(file_id, "/YRECLDP_RVICE", &yrecldp->rvice); + read_hdf5(file_id, "/YRECLDP_RVRAIN", &yrecldp->rvrain); + read_hdf5(file_id, "/YRECLDP_RVSNOW", &yrecldp->rvsnow); + read_hdf5(file_id, "/YRECLDP_RTHOMO", &yrecldp->rthomo); + read_hdf5(file_id, "/YRECLDP_RCOVPMIN", &yrecldp->rcovpmin); + read_hdf5(file_id, "/YRECLDP_RCCN", &yrecldp->rccn); + read_hdf5(file_id, "/YRECLDP_RNICE", &yrecldp->rnice); + read_hdf5(file_id, "/YRECLDP_RCCNOM", &yrecldp->rccnom); + read_hdf5(file_id, "/YRECLDP_RCCNSS", &yrecldp->rccnss); + read_hdf5(file_id, "/YRECLDP_RCCNSU", &yrecldp->rccnsu); + read_hdf5(file_id, "/YRECLDP_RCLDTOPCF", &yrecldp->rcldtopcf); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFRATE", &yrecldp->rdepliqrefrate); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFDEPTH", &yrecldp->rdepliqrefdepth); + read_hdf5(file_id, "/YRECLDP_RCL_KKAac", &yrecldp->rcl_kkaac); + read_hdf5(file_id, "/YRECLDP_RCL_KKBac", &yrecldp->rcl_kkbac); + read_hdf5(file_id, "/YRECLDP_RCL_KKAau", &yrecldp->rcl_kkaau); + read_hdf5(file_id, "/YRECLDP_RCL_KKBauq", &yrecldp->rcl_kkbauq); + read_hdf5(file_id, "/YRECLDP_RCL_KKBaun", &yrecldp->rcl_kkbaun); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_sea", &yrecldp->rcl_kk_cloud_num_sea); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_land", &yrecldp->rcl_kk_cloud_num_land); + read_hdf5(file_id, "/YRECLDP_RCL_AI", &yrecldp->rcl_ai); + read_hdf5(file_id, "/YRECLDP_RCL_BI", &yrecldp->rcl_bi); + read_hdf5(file_id, "/YRECLDP_RCL_CI", &yrecldp->rcl_ci); + read_hdf5(file_id, "/YRECLDP_RCL_DI", &yrecldp->rcl_di); + read_hdf5(file_id, "/YRECLDP_RCL_X1I", &yrecldp->rcl_x1i); + read_hdf5(file_id, "/YRECLDP_RCL_X2I", &yrecldp->rcl_x2i); + read_hdf5(file_id, "/YRECLDP_RCL_X3I", &yrecldp->rcl_x3i); + read_hdf5(file_id, "/YRECLDP_RCL_X4I", &yrecldp->rcl_x4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1I", &yrecldp->rcl_const1i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2I", &yrecldp->rcl_const2i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3I", &yrecldp->rcl_const3i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4I", &yrecldp->rcl_const4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5I", &yrecldp->rcl_const5i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6I", &yrecldp->rcl_const6i); + read_hdf5(file_id, "/YRECLDP_RCL_APB1", &yrecldp->rcl_apb1); + read_hdf5(file_id, "/YRECLDP_RCL_APB2", &yrecldp->rcl_apb2); + read_hdf5(file_id, "/YRECLDP_RCL_APB3", &yrecldp->rcl_apb3); + read_hdf5(file_id, "/YRECLDP_RCL_AS", &yrecldp->rcl_as); + read_hdf5(file_id, "/YRECLDP_RCL_BS", &yrecldp->rcl_bs); + read_hdf5(file_id, "/YRECLDP_RCL_CS", &yrecldp->rcl_cs); + read_hdf5(file_id, "/YRECLDP_RCL_DS", &yrecldp->rcl_ds); + read_hdf5(file_id, "/YRECLDP_RCL_X1S", &yrecldp->rcl_x1s); + read_hdf5(file_id, "/YRECLDP_RCL_X2S", &yrecldp->rcl_x2s); + read_hdf5(file_id, "/YRECLDP_RCL_X3S", &yrecldp->rcl_x3s); + read_hdf5(file_id, "/YRECLDP_RCL_X4S", &yrecldp->rcl_x4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1S", &yrecldp->rcl_const1s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2S", &yrecldp->rcl_const2s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3S", &yrecldp->rcl_const3s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4S", &yrecldp->rcl_const4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5S", &yrecldp->rcl_const5s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6S", &yrecldp->rcl_const6s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST7S", &yrecldp->rcl_const7s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST8S", &yrecldp->rcl_const8s); + read_hdf5(file_id, "/YRECLDP_RDENSWAT", &yrecldp->rdenswat); + read_hdf5(file_id, "/YRECLDP_RDENSREF", &yrecldp->rdensref); + read_hdf5(file_id, "/YRECLDP_RCL_AR", &yrecldp->rcl_ar); + read_hdf5(file_id, "/YRECLDP_RCL_BR", &yrecldp->rcl_br); + read_hdf5(file_id, "/YRECLDP_RCL_CR", &yrecldp->rcl_cr); + read_hdf5(file_id, "/YRECLDP_RCL_DR", &yrecldp->rcl_dr); + read_hdf5(file_id, "/YRECLDP_RCL_X1R", &yrecldp->rcl_x1r); + read_hdf5(file_id, "/YRECLDP_RCL_X2R", &yrecldp->rcl_x2r); + read_hdf5(file_id, "/YRECLDP_RCL_X4R", &yrecldp->rcl_x4r); + read_hdf5(file_id, "/YRECLDP_RCL_KA273", &yrecldp->rcl_ka273); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM1", &yrecldp->rcl_cdenom1); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM2", &yrecldp->rcl_cdenom2); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM3", &yrecldp->rcl_cdenom3); + read_hdf5(file_id, "/YRECLDP_RCL_SCHMIDT", &yrecldp->rcl_schmidt); + read_hdf5(file_id, "/YRECLDP_RCL_DYNVISC", &yrecldp->rcl_dynvisc); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1R", &yrecldp->rcl_const1r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2R", &yrecldp->rcl_const2r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3R", &yrecldp->rcl_const3r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4R", &yrecldp->rcl_const4r); + read_hdf5(file_id, "/YRECLDP_RCL_FAC1", &yrecldp->rcl_fac1); + read_hdf5(file_id, "/YRECLDP_RCL_FAC2", &yrecldp->rcl_fac2); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5R", &yrecldp->rcl_const5r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6R", &yrecldp->rcl_const6r); + read_hdf5(file_id, "/YRECLDP_RCL_FZRAB", &yrecldp->rcl_fzrab); + read_hdf5(file_id, "/YRECLDP_RCL_FZRBB", &yrecldp->rcl_fzrbb); + read_hdf5_int(file_id, "/YRECLDP_LCLDEXTRA", &yrecldp->lcldextra); // Bool + read_hdf5_int(file_id, "/YRECLDP_LCLDBUDGET", &yrecldp->lcldbudget); // Bool + read_hdf5_int(file_id, "/YRECLDP_NSSOPT", &yrecldp->nssopt); + read_hdf5_int(file_id, "/YRECLDP_NCLDTOP", &yrecldp->ncldtop); + read_hdf5_int(file_id, "/YRECLDP_NAECLBC", &yrecldp->naeclbc); + read_hdf5_int(file_id, "/YRECLDP_NAECLDU", &yrecldp->naecldu); + read_hdf5_int(file_id, "/YRECLDP_NAECLOM", &yrecldp->naeclom); + read_hdf5_int(file_id, "/YRECLDP_NAECLSS", &yrecldp->naeclss); + read_hdf5_int(file_id, "/YRECLDP_NAECLSU", &yrecldp->naeclsu); + read_hdf5_int(file_id, "/YRECLDP_NCLDDIAG", &yrecldp->nclddiag); + read_hdf5_int(file_id, "/YRECLDP_NAERCLD", &yrecldp->naercld); + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOLSP", &yrecldp->laerliqautolsp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCP", &yrecldp->laerliqautocp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCPB", &yrecldp->laerliqautocpb); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQCOLL", &yrecldp->laerliqcoll); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICESED", &yrecldp->laericesed); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICEAUTO", &yrecldp->laericeauto); // Bool + read_hdf5(file_id, "/YRECLDP_NSHAPEP", &yrecldp->nshapep); + read_hdf5(file_id, "/YRECLDP_NSHAPEQ", &yrecldp->nshapeq); + read_hdf5_int(file_id, "/YRECLDP_NBETA", &yrecldp->nbeta); + + status = H5Fclose(file_id); + +#endif + } + + /* Read reference result into memory */ void load_reference(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, @@ -364,13 +656,15 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) { + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "reference", "Binary"); serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); serialboxSavepoint_t* savepoint = savepoints[0]; - int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); - load_and_expand_2d(serializer, savepoint, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); load_and_expand_2d(serializer, savepoint, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); load_and_expand_1d(serializer, savepoint, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); @@ -395,4 +689,37 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng serialboxSerializerDestroySavepointVector(savepoints, 1); serialboxSerializerDestroy(serializer); +#endif + +#ifdef HAVE_HDF5 + + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(REFERENCE_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + load_and_expand_2d(file_id, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(file_id, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); + load_and_expand_1d(file_id, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); + load_and_expand_2d(file_id, "PFSQLF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqlf); + load_and_expand_2d(file_id, "PFSQIF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqif); + load_and_expand_2d(file_id, "PFCQLNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqlng); + load_and_expand_2d(file_id, "PFCQNNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqnng); + load_and_expand_2d(file_id, "PFSQRF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqrf); + load_and_expand_2d(file_id, "PFSQSF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqsf); + load_and_expand_2d(file_id, "PFCQRNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqrng); + load_and_expand_2d(file_id, "PFCQSNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqsng); + load_and_expand_2d(file_id, "PFSQLTUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqltur); + load_and_expand_2d(file_id, "PFSQITUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqitur); + load_and_expand_2d(file_id, "PFPLSL", nlon, nlev+1, nproma, ngptot, nblocks, pfplsl); + load_and_expand_2d(file_id, "PFPLSN", nlon, nlev+1, nproma, ngptot, nblocks, pfplsn); + load_and_expand_2d(file_id, "PFHPSL", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsl); + load_and_expand_2d(file_id, "PFHPSN", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsn); + load_and_expand_2d(file_id, "TENDENCY_LOC_T", nlon, nlev, nproma, ngptot, nblocks, tend_loc_t); + load_and_expand_2d(file_id, "TENDENCY_LOC_Q", nlon, nlev, nproma, ngptot, nblocks, tend_loc_q); + load_and_expand_2d(file_id, "TENDENCY_LOC_A", nlon, nlev, nproma, ngptot, nblocks, tend_loc_a); + load_and_expand_3d(file_id, "TENDENCY_LOC_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_loc_cld); + + status = H5Fclose(file_id); +#endif + } From ef04bd2c69561f50f6b29085d42240d251756061 Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Tue, 13 Feb 2024 17:55:43 +0200 Subject: [PATCH 159/174] Build HIP variants with HDF5 (in addition to Serialbox) --- src/cloudsc_hip/CMakeLists.txt | 44 ++- src/cloudsc_hip/cloudsc/cloudsc_validate.cpp | 22 +- src/cloudsc_hip/cloudsc/load_state.cpp | 371 +++++++++++++++++-- 3 files changed, 387 insertions(+), 50 deletions(-) diff --git a/src/cloudsc_hip/CMakeLists.txt b/src/cloudsc_hip/CMakeLists.txt index 0f04960e..3651d2bd 100644 --- a/src/cloudsc_hip/CMakeLists.txt +++ b/src/cloudsc_hip/CMakeLists.txt @@ -9,7 +9,7 @@ # Define this dwarf variant as an ECBuild feature ecbuild_add_option( FEATURE CLOUDSC_HIP DESCRIPTION "Build the HIP version CLOUDSC using Serialbox" DEFAULT ON - CONDITION Serialbox_FOUND AND HAVE_HIP + CONDITION (Serialbox_FOUND OR HDF5_FOUND) AND HAVE_HIP ) if( HAVE_CLOUDSC_HIP ) @@ -37,13 +37,16 @@ if( HAVE_CLOUDSC_HIP ) $ $ PUBLIC_LIBS - hip::device - Serialbox::Serialbox_C + hip::device + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) target_include_directories(dwarf-cloudsc-hip-lib PUBLIC $ $) - target_link_libraries(dwarf-cloudsc-hip-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) + target_link_libraries(dwarf-cloudsc-hip-lib PUBLIC hip::device $<${HAVE_HDF5}:hdf5::hdf5> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C>) if (NOT DEFINED CMAKE_HIP_ARCHITECTURES) message(WARNING "No HIP architecture is set! ('CMAKE_HIP_ARCHITECTURES' is not defined)") @@ -86,13 +89,16 @@ if( HAVE_CLOUDSC_HIP ) $ $ PUBLIC_LIBS - hip::device - Serialbox::Serialbox_C + hip::device + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) target_include_directories(dwarf-cloudsc-hip-hoist-lib PUBLIC $ $) - target_link_libraries(dwarf-cloudsc-hip-hoist-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) + target_link_libraries(dwarf-cloudsc-hip-hoist-lib PUBLIC hip::device $<${HAVE_HDF5}:hdf5::hdf5> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C>) if (NOT DEFINED CMAKE_HIP_ARCHITECTURES) message(WARNING "No HIP architecture is set! ('CMAKE_HIP_ARCHITECTURES' is not defined)") @@ -135,13 +141,16 @@ if( HAVE_CLOUDSC_HIP ) $ $ PUBLIC_LIBS - hip::device - Serialbox::Serialbox_C + hip::device + $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C> + DEFINITIONS + ${CLOUDSC_DEFINITIONS} ) target_include_directories(dwarf-cloudsc-hip-k-caching-lib PUBLIC $ $) - target_link_libraries(dwarf-cloudsc-hip-k-caching-lib PUBLIC hip::device Serialbox::Serialbox_C $<${HAVE_OMP}:OpenMP::OpenMP_C>) + target_link_libraries(dwarf-cloudsc-hip-k-caching-lib PUBLIC hip::device $<${HAVE_HDF5}:hdf5::hdf5> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C>) if (NOT DEFINED CMAKE_HIP_ARCHITECTURES) message(WARNING "No HIP architecture is set! ('CMAKE_HIP_ARCHITECTURES' is not defined)") @@ -164,8 +173,17 @@ if( HAVE_CLOUDSC_HIP ) ) ## - # Create symlink for the input data - execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink - ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + # Create symlink for the input data + if( HAVE_SERIALBOX ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) + endif() + + if( HAVE_HDF5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 ) + execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/reference.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../reference.h5 ) + endif() endif() diff --git a/src/cloudsc_hip/cloudsc/cloudsc_validate.cpp b/src/cloudsc_hip/cloudsc/cloudsc_validate.cpp index 79566e43..ab81204d 100644 --- a/src/cloudsc_hip/cloudsc/cloudsc_validate.cpp +++ b/src/cloudsc_hip/cloudsc/cloudsc_validate.cpp @@ -50,8 +50,6 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jk; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; - //double (*field)[nlon] = (double (*)[nlon]) v_field; - //double (*reference)[nlon] = (double (*)[nlon]) v_ref; zminval = +DBL_MAX; zmaxval = -DBL_MAX; @@ -59,8 +57,8 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i zerrsum = 0.0; zsum = 0.0; -// #pragma omp parallel for default(shared) private(b, bsize, jk) \ -// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + #pragma omp parallel for default(shared) private(b, bsize, jk) \ + reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) for (b = 0; b < nblocks; b++) { bsize = min(nlon, ngptot - b*nlon); // field block size for (jk = 0; jk < bsize; jk++) { @@ -84,8 +82,6 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jl, jk; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; -// double (*field)[nlev][nlon] = (double (*)[nlev][nlon]) v_field; -// double (*reference)[nlev][nlon] = (double (*)[nlev][nlon]) v_ref; zminval = +DBL_MAX; zmaxval = -DBL_MAX; @@ -93,9 +89,8 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int zerrsum = 0.0; zsum = 0.0; -// #pragma omp parallel for default(shared) private(b, bsize, jl, jk) \ -// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) - + #pragma omp parallel for default(shared) private(b, bsize, jl, jk) \ + reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) for (b = 0; b < nblocks; b++) { bsize = min(nlon, ngptot - b*nlon); // field block size for (jl = 0; jl < nlev; jl++) { @@ -111,7 +106,6 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int } } } - zavgpgp = zerrsum / (double) ngptot; print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); } @@ -123,8 +117,6 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jl, jk, jm; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; -// double (*field)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_field; -// double (*reference)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_ref; zminval = +DBL_MAX; zmaxval = -DBL_MAX; @@ -132,8 +124,8 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, zerrsum = 0.0; zsum = 0.0; -// #pragma omp parallel for default(shared) private(b, bsize, jl, jk, jm) \ -// reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) + #pragma omp parallel for default(shared) private(b, bsize, jl, jk, jm) \ + reduction(min:zminval) reduction(max:zmaxval,zmaxerr) reduction(+:zerrsum,zsum) for (b = 0; b < nblocks; b++) { bsize = min(nlon, ngptot - b*nlon); // field block size for (jm = 0; jm < nclv; jm++) { @@ -150,7 +142,7 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, } } } - } + } zavgpgp = zerrsum / (double) ngptot; print_error(name, zminval, zmaxval, zmaxerr, zerrsum, zsum, zavgpgp, 2); } diff --git a/src/cloudsc_hip/cloudsc/load_state.cpp b/src/cloudsc_hip/cloudsc/load_state.cpp index 034270ca..767fe257 100644 --- a/src/cloudsc_hip/cloudsc/load_state.cpp +++ b/src/cloudsc_hip/cloudsc/load_state.cpp @@ -9,17 +9,42 @@ */ #include "load_state.h" -#include #include +#ifdef HAVE_SERIALBOX #include "serialbox-c/Serialbox.h" +#endif +#ifdef HAVE_HDF5 +#include "hdf5.h" +#define INPUT_FILE "input.h5" +#define REFERENCE_FILE "reference.h5" +#endif #define min(a, b) (((a) < (b)) ? (a) : (b)) #define max(a, b) (((a) > (b)) ? (a) : (b)) +#ifdef HAVE_HDF5 +void read_hdf5_int(hid_t file_id, const char *name, int *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} + +void read_hdf5(hid_t file_id, const char *name, double *field) { + hid_t dataset_id; + herr_t status; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, field); + status = H5Dclose(dataset_id); +} +#endif + /* Query sizes and dimensions of state arrays */ void query_state(int *klon, int *klev) { +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); serialboxMetainfo_t* globalMetainfo = serialboxSerializerGetGlobalMetainfo(serializer); @@ -28,6 +53,17 @@ void query_state(int *klon, int *klev) serialboxMetainfoDestroy(globalMetainfo); serialboxSerializerDestroy(serializer); +#endif +#ifdef HAVE_HDF5 + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(INPUT_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + read_hdf5_int(file_id, "/KLEV", klev); + read_hdf5_int(file_id, "/KLON", klon); + + status = H5Fclose(file_id); +#endif } void expand_1d(double *buffer, double *field_in, int nlon, int nproma, int ngptot, int nblocks) @@ -94,7 +130,7 @@ void expand_3d(double *buffer_in, double *field_in, int nlon, int nlev, int nclv } } - +#ifdef HAVE_SERIALBOX void load_and_expand_1d(serialboxSerializer_t *serializer, serialboxSavepoint_t* savepoint, const char *name, int nlon, int nproma, int ngptot, int nblocks, double *field) { @@ -134,32 +170,83 @@ void load_and_expand_3d(serialboxSerializer_t *serializer, serialboxSavepoint_t* serialboxSerializerRead(serializer, name, savepoint, buffer, strides, 3); expand_3d((double *)buffer, field, nlon, nlev, nclv, nproma, ngptot, nblocks); } +#endif +#if HAVE_HDF5 +void load_and_expand_1d(hid_t file_id, const char *name, int nlon, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nlon]; + int strides[1] = {1}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_1d((double *)buffer, field, nlon, nproma, ngptot, nblocks); +} -/* Read input state into memory */ -void load_state(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, - double* ptsphy, double* plcrit_aer, double* picrit_aer, - double* pre_ice, double* pccn, double* pnice, double* pt, double* pq, - double* tend_cml_t, double* tend_cml_q, double* tend_cml_a, double* tend_cml_cld, - double* tend_tmp_t, double* tend_tmp_q, double* tend_tmp_a, double* tend_tmp_cld, - double* pvfa, double* pvfl, double* pvfi, double* pdyna, double* pdynl, double* pdyni, - double* phrsw, double* phrlw, double* pvervel, double* pap, double* paph, double* plsm, - int* ktype, double* plu, double* plude, double* psnde, double* pmfu, - double* pmfd, double* pa, double* pclv, double* psupsat, struct TECLDP* yrecldp, - double* rg, double* rd, double* rcpd, double* retv, double* rlvtt, double* rlstt, - double* rlmlt, double* rtt, double* rv, double* r2es, double* r3les, double* r3ies, - double* r4les, double* r4ies, double* r5les, double* r5ies, double* r5alvcp, double* r5alscp, - double* ralvdcp, double* ralsdcp, double* ralfdcp, double* rtwat, - double* rtice, double* rticecu, double* rtwat_rtice_r, double *rtwat_rticecu_r, - double* rkoop1, double* rkoop2 ) +void load_and_expand_1d_int(hid_t file_id, const char *name, int nlon, int nproma, int ngptot, int nblocks, int *field) { + int buffer[nlon]; + int strides[1] = {1}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_1d_int((int *)buffer, field, nlon, nproma, ngptot, nblocks); +} + +void load_and_expand_2d(hid_t file_id, const char *name, int nlon, int nlev, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nlev][nlon]; + int strides[2] = {1, nlon}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_2d((double *)buffer, field, nlon, nlev, nproma, ngptot, nblocks); +} + +void load_and_expand_3d(hid_t file_id, const char *name, int nlon, int nlev, int nclv, int nproma, int ngptot, int nblocks, double *field) +{ + double buffer[nclv][nlev][nlon]; + int strides[3] = {1, nlon, nlev*nlon}; + hid_t dataset_id; + dataset_id = H5Dopen2(file_id, name, H5P_DEFAULT); + herr_t status; + status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, buffer); + status = H5Dclose(dataset_id); + expand_3d((double *)buffer, field, nlon, nlev, nclv, nproma, ngptot, nblocks); +} +#endif + +/* Read input state into memory */ +void load_state(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, + double* ptsphy, double* plcrit_aer, double* picrit_aer, + double* pre_ice, double* pccn, double* pnice, double* pt, double* pq, + double* tend_cml_t, double* tend_cml_q, double* tend_cml_a, double* tend_cml_cld, + double* tend_tmp_t, double* tend_tmp_q, double* tend_tmp_a, double* tend_tmp_cld, + double* pvfa, double* pvfl, double* pvfi, double* pdyna, double* pdynl, double* pdyni, + double* phrsw, double* phrlw, double* pvervel, double* pap, double* paph, double* plsm, + int* ktype, double* plu, double* plude, double* psnde, double* pmfu, + double* pmfd, double* pa, double* pclv, double* psupsat, struct TECLDP* yrecldp, + double* rg, double* rd, double* rcpd, double* retv, double* rlvtt, double* rlstt, + double* rlmlt, double* rtt, double* rv, double* r2es, double* r3les, double* r3ies, + double* r4les, double* r4ies, double* r5les, double* r5ies, double* r5alvcp, double* r5alscp, + double* ralvdcp, double* ralsdcp, double* ralfdcp, double* rtwat, + double* rtice, double* rticecu, double* rtwat_rtice_r, double *rtwat_rticecu_r, + double* rkoop1, double* rkoop2) { + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "input", "Binary"); serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); serialboxSavepoint_t* savepoint = savepoints[0]; - int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); - load_and_expand_2d(serializer, savepoint, "PLCRIT_AER", nlon, nlev, nproma, ngptot, nblocks, plcrit_aer); load_and_expand_2d(serializer, savepoint, "PICRIT_AER", nlon, nlev, nproma, ngptot, nblocks, picrit_aer); load_and_expand_2d(serializer, savepoint, "PRE_ICE", nlon, nlev, nproma, ngptot, nblocks, pre_ice); @@ -355,8 +442,213 @@ void load_state(const int nlon, const int nlev, const int nclv, const int ngptot serialboxSerializerDestroySavepointVector(savepoints, 1); serialboxSerializerDestroy(serializer); +#endif + +#ifdef HAVE_HDF5 + + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(INPUT_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + load_and_expand_2d(file_id, "PLCRIT_AER", nlon, nlev, nproma, ngptot, nblocks, plcrit_aer); + load_and_expand_2d(file_id, "PICRIT_AER", nlon, nlev, nproma, ngptot, nblocks, picrit_aer); + load_and_expand_2d(file_id, "PRE_ICE", nlon, nlev, nproma, ngptot, nblocks, pre_ice); + load_and_expand_2d(file_id, "PCCN", nlon, nlev, nproma, ngptot, nblocks, pccn); + load_and_expand_2d(file_id, "PNICE", nlon, nlev, nproma, ngptot, nblocks, pnice); + load_and_expand_2d(file_id, "PT", nlon, nlev, nproma, ngptot, nblocks, pt); + load_and_expand_2d(file_id, "PQ", nlon, nlev, nproma, ngptot, nblocks, pq); + load_and_expand_2d(file_id, "TENDENCY_CML_T", nlon, nlev, nproma, ngptot, nblocks, tend_cml_t); + load_and_expand_2d(file_id, "TENDENCY_CML_Q", nlon, nlev, nproma, ngptot, nblocks, tend_cml_q); + load_and_expand_2d(file_id, "TENDENCY_CML_A", nlon, nlev, nproma, ngptot, nblocks, tend_cml_a); + load_and_expand_3d(file_id, "TENDENCY_CML_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_cml_cld); + load_and_expand_2d(file_id, "TENDENCY_TMP_T", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_t); + load_and_expand_2d(file_id, "TENDENCY_TMP_Q", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_q); + load_and_expand_2d(file_id, "TENDENCY_TMP_A", nlon, nlev, nproma, ngptot, nblocks, tend_tmp_a); + load_and_expand_3d(file_id, "TENDENCY_TMP_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_tmp_cld); + load_and_expand_2d(file_id, "PVFA", nlon, nlev, nproma, ngptot, nblocks, pvfa); + load_and_expand_2d(file_id, "PVFL", nlon, nlev, nproma, ngptot, nblocks, pvfl); + load_and_expand_2d(file_id, "PVFI", nlon, nlev, nproma, ngptot, nblocks, pvfi); + load_and_expand_2d(file_id, "PDYNA", nlon, nlev, nproma, ngptot, nblocks, pdyna); + load_and_expand_2d(file_id, "PDYNL", nlon, nlev, nproma, ngptot, nblocks, pdynl); + load_and_expand_2d(file_id, "PDYNI", nlon, nlev, nproma, ngptot, nblocks, pdyni); + load_and_expand_2d(file_id, "PHRSW", nlon, nlev, nproma, ngptot, nblocks, phrsw); + load_and_expand_2d(file_id, "PHRLW", nlon, nlev, nproma, ngptot, nblocks, phrlw); + load_and_expand_2d(file_id, "PVERVEL", nlon, nlev, nproma, ngptot, nblocks, pvervel); + load_and_expand_2d(file_id, "PAP", nlon, nlev, nproma, ngptot, nblocks, pap); + load_and_expand_2d(file_id, "PAPH", nlon, nlev+1, nproma, ngptot, nblocks, paph); + load_and_expand_1d(file_id, "PLSM", nlon, nproma, ngptot, nblocks, plsm); + load_and_expand_1d_int(file_id, "KTYPE", nlon, nproma, ngptot, nblocks, ktype); + load_and_expand_2d(file_id, "PLU", nlon, nlev, nproma, ngptot, nblocks, plu); + load_and_expand_2d(file_id, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(file_id, "PSNDE", nlon, nlev, nproma, ngptot, nblocks, psnde); + load_and_expand_2d(file_id, "PMFU", nlon, nlev, nproma, ngptot, nblocks, pmfu); + load_and_expand_2d(file_id, "PMFD", nlon, nlev, nproma, ngptot, nblocks, pmfd); + load_and_expand_2d(file_id, "PA", nlon, nlev, nproma, ngptot, nblocks, pa); + load_and_expand_3d(file_id, "PCLV", nlon, nlev, nclv, nproma, ngptot, nblocks, pclv); + load_and_expand_2d(file_id, "PSUPSAT", nlon, nlev, nproma, ngptot, nblocks, psupsat); + + read_hdf5(file_id, "/PTSPHY", ptsphy); + + read_hdf5(file_id, "/RG", rg); + read_hdf5(file_id, "/RD", rd); + read_hdf5(file_id, "/RCPD", rcpd); + read_hdf5(file_id, "/RETV", retv); + read_hdf5(file_id, "/RLVTT", rlvtt); + read_hdf5(file_id, "/RLSTT", rlstt); + read_hdf5(file_id, "/RLMLT", rlmlt); + read_hdf5(file_id, "/RTT", rtt); + read_hdf5(file_id, "/RV", rv); + read_hdf5(file_id, "/R2ES", r2es); + read_hdf5(file_id, "/R3LES", r3les); + read_hdf5(file_id, "/R3IES", r3ies); + read_hdf5(file_id, "/R4LES", r4les); + read_hdf5(file_id, "/R4IES", r4ies); + read_hdf5(file_id, "/R5LES", r5les); + read_hdf5(file_id, "/R5IES", r5ies); + read_hdf5(file_id, "/R5ALVCP", r5alvcp); + read_hdf5(file_id, "/R5ALSCP", r5alscp); + read_hdf5(file_id, "/RALVDCP", ralvdcp); + read_hdf5(file_id, "/RALSDCP", ralsdcp); + read_hdf5(file_id, "/RALFDCP", ralfdcp); + read_hdf5(file_id, "/RTWAT", rtwat); + read_hdf5(file_id, "/RTICE", rtice); + read_hdf5(file_id, "/RTICECU", rticecu); + read_hdf5(file_id, "/RTWAT_RTICE_R", rtwat_rtice_r); + read_hdf5(file_id, "/RTWAT_RTICECU_R", rtwat_rticecu_r); + read_hdf5(file_id, "/RKOOP1", rkoop1); + read_hdf5(file_id, "/RKOOP2", rkoop2); + + read_hdf5(file_id, "/YRECLDP_RAMID", &yrecldp->ramid); + read_hdf5(file_id, "/YRECLDP_RCLDIFF", &yrecldp->rcldiff); + read_hdf5(file_id, "/YRECLDP_RCLDIFF_CONVI", &yrecldp->rcldiff_convi); + read_hdf5(file_id, "/YRECLDP_RCLCRIT", &yrecldp->rclcrit); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_SEA", &yrecldp->rclcrit_sea); + read_hdf5(file_id, "/YRECLDP_RCLCRIT_LAND", &yrecldp->rclcrit_land); + read_hdf5(file_id, "/YRECLDP_RKCONV", &yrecldp->rkconv); + read_hdf5(file_id, "/YRECLDP_RPRC1", &yrecldp->rprc1); + read_hdf5(file_id, "/YRECLDP_RPRC2", &yrecldp->rprc2); + read_hdf5(file_id, "/YRECLDP_RCLDMAX", &yrecldp->rcldmax); + read_hdf5(file_id, "/YRECLDP_RPECONS", &yrecldp->rpecons); + read_hdf5(file_id, "/YRECLDP_RVRFACTOR", &yrecldp->rvrfactor); + read_hdf5(file_id, "/YRECLDP_RPRECRHMAX", &yrecldp->rprecrhmax); + read_hdf5(file_id, "/YRECLDP_RTAUMEL", &yrecldp->rtaumel); + read_hdf5(file_id, "/YRECLDP_RAMIN", &yrecldp->ramin); + read_hdf5(file_id, "/YRECLDP_RLMIN", &yrecldp->rlmin); + read_hdf5(file_id, "/YRECLDP_RKOOPTAU", &yrecldp->rkooptau); + read_hdf5(file_id, "/YRECLDP_RCLDTOPP", &yrecldp->rcldtopp); + read_hdf5(file_id, "/YRECLDP_RLCRITSNOW", &yrecldp->rlcritsnow); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN1", &yrecldp->rsnowlin1); + read_hdf5(file_id, "/YRECLDP_RSNOWLIN2", &yrecldp->rsnowlin2); + read_hdf5(file_id, "/YRECLDP_RICEHI1", &yrecldp->ricehi1); + read_hdf5(file_id, "/YRECLDP_RICEHI2", &yrecldp->ricehi2); + read_hdf5(file_id, "/YRECLDP_RICEINIT", &yrecldp->riceinit); + read_hdf5(file_id, "/YRECLDP_RVICE", &yrecldp->rvice); + read_hdf5(file_id, "/YRECLDP_RVRAIN", &yrecldp->rvrain); + read_hdf5(file_id, "/YRECLDP_RVSNOW", &yrecldp->rvsnow); + read_hdf5(file_id, "/YRECLDP_RTHOMO", &yrecldp->rthomo); + read_hdf5(file_id, "/YRECLDP_RCOVPMIN", &yrecldp->rcovpmin); + read_hdf5(file_id, "/YRECLDP_RCCN", &yrecldp->rccn); + read_hdf5(file_id, "/YRECLDP_RNICE", &yrecldp->rnice); + read_hdf5(file_id, "/YRECLDP_RCCNOM", &yrecldp->rccnom); + read_hdf5(file_id, "/YRECLDP_RCCNSS", &yrecldp->rccnss); + read_hdf5(file_id, "/YRECLDP_RCCNSU", &yrecldp->rccnsu); + read_hdf5(file_id, "/YRECLDP_RCLDTOPCF", &yrecldp->rcldtopcf); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFRATE", &yrecldp->rdepliqrefrate); + read_hdf5(file_id, "/YRECLDP_RDEPLIQREFDEPTH", &yrecldp->rdepliqrefdepth); + read_hdf5(file_id, "/YRECLDP_RCL_KKAac", &yrecldp->rcl_kkaac); + read_hdf5(file_id, "/YRECLDP_RCL_KKBac", &yrecldp->rcl_kkbac); + read_hdf5(file_id, "/YRECLDP_RCL_KKAau", &yrecldp->rcl_kkaau); + read_hdf5(file_id, "/YRECLDP_RCL_KKBauq", &yrecldp->rcl_kkbauq); + read_hdf5(file_id, "/YRECLDP_RCL_KKBaun", &yrecldp->rcl_kkbaun); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_sea", &yrecldp->rcl_kk_cloud_num_sea); + read_hdf5(file_id, "/YRECLDP_RCL_KK_cloud_num_land", &yrecldp->rcl_kk_cloud_num_land); + read_hdf5(file_id, "/YRECLDP_RCL_AI", &yrecldp->rcl_ai); + read_hdf5(file_id, "/YRECLDP_RCL_BI", &yrecldp->rcl_bi); + read_hdf5(file_id, "/YRECLDP_RCL_CI", &yrecldp->rcl_ci); + read_hdf5(file_id, "/YRECLDP_RCL_DI", &yrecldp->rcl_di); + read_hdf5(file_id, "/YRECLDP_RCL_X1I", &yrecldp->rcl_x1i); + read_hdf5(file_id, "/YRECLDP_RCL_X2I", &yrecldp->rcl_x2i); + read_hdf5(file_id, "/YRECLDP_RCL_X3I", &yrecldp->rcl_x3i); + read_hdf5(file_id, "/YRECLDP_RCL_X4I", &yrecldp->rcl_x4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1I", &yrecldp->rcl_const1i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2I", &yrecldp->rcl_const2i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3I", &yrecldp->rcl_const3i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4I", &yrecldp->rcl_const4i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5I", &yrecldp->rcl_const5i); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6I", &yrecldp->rcl_const6i); + read_hdf5(file_id, "/YRECLDP_RCL_APB1", &yrecldp->rcl_apb1); + read_hdf5(file_id, "/YRECLDP_RCL_APB2", &yrecldp->rcl_apb2); + read_hdf5(file_id, "/YRECLDP_RCL_APB3", &yrecldp->rcl_apb3); + read_hdf5(file_id, "/YRECLDP_RCL_AS", &yrecldp->rcl_as); + read_hdf5(file_id, "/YRECLDP_RCL_BS", &yrecldp->rcl_bs); + read_hdf5(file_id, "/YRECLDP_RCL_CS", &yrecldp->rcl_cs); + read_hdf5(file_id, "/YRECLDP_RCL_DS", &yrecldp->rcl_ds); + read_hdf5(file_id, "/YRECLDP_RCL_X1S", &yrecldp->rcl_x1s); + read_hdf5(file_id, "/YRECLDP_RCL_X2S", &yrecldp->rcl_x2s); + read_hdf5(file_id, "/YRECLDP_RCL_X3S", &yrecldp->rcl_x3s); + read_hdf5(file_id, "/YRECLDP_RCL_X4S", &yrecldp->rcl_x4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1S", &yrecldp->rcl_const1s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2S", &yrecldp->rcl_const2s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3S", &yrecldp->rcl_const3s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4S", &yrecldp->rcl_const4s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5S", &yrecldp->rcl_const5s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6S", &yrecldp->rcl_const6s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST7S", &yrecldp->rcl_const7s); + read_hdf5(file_id, "/YRECLDP_RCL_CONST8S", &yrecldp->rcl_const8s); + read_hdf5(file_id, "/YRECLDP_RDENSWAT", &yrecldp->rdenswat); + read_hdf5(file_id, "/YRECLDP_RDENSREF", &yrecldp->rdensref); + read_hdf5(file_id, "/YRECLDP_RCL_AR", &yrecldp->rcl_ar); + read_hdf5(file_id, "/YRECLDP_RCL_BR", &yrecldp->rcl_br); + read_hdf5(file_id, "/YRECLDP_RCL_CR", &yrecldp->rcl_cr); + read_hdf5(file_id, "/YRECLDP_RCL_DR", &yrecldp->rcl_dr); + read_hdf5(file_id, "/YRECLDP_RCL_X1R", &yrecldp->rcl_x1r); + read_hdf5(file_id, "/YRECLDP_RCL_X2R", &yrecldp->rcl_x2r); + read_hdf5(file_id, "/YRECLDP_RCL_X4R", &yrecldp->rcl_x4r); + read_hdf5(file_id, "/YRECLDP_RCL_KA273", &yrecldp->rcl_ka273); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM1", &yrecldp->rcl_cdenom1); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM2", &yrecldp->rcl_cdenom2); + read_hdf5(file_id, "/YRECLDP_RCL_CDENOM3", &yrecldp->rcl_cdenom3); + read_hdf5(file_id, "/YRECLDP_RCL_SCHMIDT", &yrecldp->rcl_schmidt); + read_hdf5(file_id, "/YRECLDP_RCL_DYNVISC", &yrecldp->rcl_dynvisc); + read_hdf5(file_id, "/YRECLDP_RCL_CONST1R", &yrecldp->rcl_const1r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST2R", &yrecldp->rcl_const2r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST3R", &yrecldp->rcl_const3r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST4R", &yrecldp->rcl_const4r); + read_hdf5(file_id, "/YRECLDP_RCL_FAC1", &yrecldp->rcl_fac1); + read_hdf5(file_id, "/YRECLDP_RCL_FAC2", &yrecldp->rcl_fac2); + read_hdf5(file_id, "/YRECLDP_RCL_CONST5R", &yrecldp->rcl_const5r); + read_hdf5(file_id, "/YRECLDP_RCL_CONST6R", &yrecldp->rcl_const6r); + read_hdf5(file_id, "/YRECLDP_RCL_FZRAB", &yrecldp->rcl_fzrab); + read_hdf5(file_id, "/YRECLDP_RCL_FZRBB", &yrecldp->rcl_fzrbb); + read_hdf5_int(file_id, "/YRECLDP_LCLDEXTRA", &yrecldp->lcldextra); // Bool + read_hdf5_int(file_id, "/YRECLDP_LCLDBUDGET", &yrecldp->lcldbudget); // Bool + read_hdf5_int(file_id, "/YRECLDP_NSSOPT", &yrecldp->nssopt); + read_hdf5_int(file_id, "/YRECLDP_NCLDTOP", &yrecldp->ncldtop); + read_hdf5_int(file_id, "/YRECLDP_NAECLBC", &yrecldp->naeclbc); + read_hdf5_int(file_id, "/YRECLDP_NAECLDU", &yrecldp->naecldu); + read_hdf5_int(file_id, "/YRECLDP_NAECLOM", &yrecldp->naeclom); + read_hdf5_int(file_id, "/YRECLDP_NAECLSS", &yrecldp->naeclss); + read_hdf5_int(file_id, "/YRECLDP_NAECLSU", &yrecldp->naeclsu); + read_hdf5_int(file_id, "/YRECLDP_NCLDDIAG", &yrecldp->nclddiag); + read_hdf5_int(file_id, "/YRECLDP_NAERCLD", &yrecldp->naercld); + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOLSP", &yrecldp->laerliqautolsp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCP", &yrecldp->laerliqautocp); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQAUTOCPB", &yrecldp->laerliqautocpb); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERLIQCOLL", &yrecldp->laerliqcoll); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICESED", &yrecldp->laericesed); // Bool + read_hdf5_int(file_id, "/YRECLDP_LAERICEAUTO", &yrecldp->laericeauto); // Bool + read_hdf5(file_id, "/YRECLDP_NSHAPEP", &yrecldp->nshapep); + read_hdf5(file_id, "/YRECLDP_NSHAPEQ", &yrecldp->nshapeq); + read_hdf5_int(file_id, "/YRECLDP_NBETA", &yrecldp->nbeta); + + status = H5Fclose(file_id); + +#endif + } + + /* Read reference result into memory */ void load_reference(const int nlon, const int nlev, const int nclv, const int ngptot, const int nproma, double *plude, double *pcovptot, double *prainfrac_toprfz, double *pfsqlf, double *pfsqif, @@ -364,13 +656,15 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng double *pfsqltur, double *pfsqitur, double *pfplsl, double *pfplsn, double *pfhpsl, double *pfhpsn, double *tend_loc_a, double *tend_loc_q, double *tend_loc_t, double *tend_loc_cld) { + + int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); + +#ifdef HAVE_SERIALBOX serialboxSerializer_t* serializer = serialboxSerializerCreate(Read, "./data", "reference", "Binary"); serialboxMetainfo_t* metainfo = serialboxSerializerGetGlobalMetainfo(serializer); serialboxSavepoint_t** savepoints = serialboxSerializerGetSavepointVector(serializer); serialboxSavepoint_t* savepoint = savepoints[0]; - int nblocks = (ngptot / nproma) + min(ngptot % nproma, 1); - load_and_expand_2d(serializer, savepoint, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); load_and_expand_2d(serializer, savepoint, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); load_and_expand_1d(serializer, savepoint, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); @@ -395,4 +689,37 @@ void load_reference(const int nlon, const int nlev, const int nclv, const int ng serialboxSerializerDestroySavepointVector(savepoints, 1); serialboxSerializerDestroy(serializer); +#endif + +#ifdef HAVE_HDF5 + + hid_t file_id, dataset_id; + herr_t status; + file_id = H5Fopen(REFERENCE_FILE, H5F_ACC_RDWR, H5P_DEFAULT); + + load_and_expand_2d(file_id, "PLUDE", nlon, nlev, nproma, ngptot, nblocks, plude); + load_and_expand_2d(file_id, "PCOVPTOT", nlon, nlev, nproma, ngptot, nblocks, pcovptot); + load_and_expand_1d(file_id, "PRAINFRAC_TOPRFZ", nlon, nproma, ngptot, nblocks, prainfrac_toprfz); + load_and_expand_2d(file_id, "PFSQLF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqlf); + load_and_expand_2d(file_id, "PFSQIF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqif); + load_and_expand_2d(file_id, "PFCQLNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqlng); + load_and_expand_2d(file_id, "PFCQNNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqnng); + load_and_expand_2d(file_id, "PFSQRF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqrf); + load_and_expand_2d(file_id, "PFSQSF", nlon, nlev+1, nproma, ngptot, nblocks, pfsqsf); + load_and_expand_2d(file_id, "PFCQRNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqrng); + load_and_expand_2d(file_id, "PFCQSNG", nlon, nlev+1, nproma, ngptot, nblocks, pfcqsng); + load_and_expand_2d(file_id, "PFSQLTUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqltur); + load_and_expand_2d(file_id, "PFSQITUR", nlon, nlev+1, nproma, ngptot, nblocks, pfsqitur); + load_and_expand_2d(file_id, "PFPLSL", nlon, nlev+1, nproma, ngptot, nblocks, pfplsl); + load_and_expand_2d(file_id, "PFPLSN", nlon, nlev+1, nproma, ngptot, nblocks, pfplsn); + load_and_expand_2d(file_id, "PFHPSL", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsl); + load_and_expand_2d(file_id, "PFHPSN", nlon, nlev+1, nproma, ngptot, nblocks, pfhpsn); + load_and_expand_2d(file_id, "TENDENCY_LOC_T", nlon, nlev, nproma, ngptot, nblocks, tend_loc_t); + load_and_expand_2d(file_id, "TENDENCY_LOC_Q", nlon, nlev, nproma, ngptot, nblocks, tend_loc_q); + load_and_expand_2d(file_id, "TENDENCY_LOC_A", nlon, nlev, nproma, ngptot, nblocks, tend_loc_a); + load_and_expand_3d(file_id, "TENDENCY_LOC_CLD", nlon, nlev, nclv, nproma, ngptot, nblocks, tend_loc_cld); + + status = H5Fclose(file_id); +#endif + } From f5abe8aac2b762d2d86d30c66b461000c4d474bd Mon Sep 17 00:00:00 2001 From: MichaelSt98 Date: Tue, 13 Feb 2024 17:57:12 +0200 Subject: [PATCH 160/174] CUDA validate: remove unused local variables --- src/cloudsc_cuda/cloudsc/cloudsc_validate.cu | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu b/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu index 8998115e..ab81204d 100644 --- a/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu +++ b/src/cloudsc_cuda/cloudsc/cloudsc_validate.cu @@ -50,8 +50,6 @@ void validate_1d(const char *name, double * v_ref, double * v_field, int nlon, i /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jk; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; - double (*field)[nlon] = (double (*)[nlon]) v_field; - double (*reference)[nlon] = (double (*)[nlon]) v_ref; zminval = +DBL_MAX; zmaxval = -DBL_MAX; @@ -84,8 +82,6 @@ void validate_2d(const char *name, double *v_ref, double *v_field, int nlon, int /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jl, jk; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; - double (*field)[nlev][nlon] = (double (*)[nlev][nlon]) v_field; - double (*reference)[nlev][nlon] = (double (*)[nlev][nlon]) v_ref; zminval = +DBL_MAX; zmaxval = -DBL_MAX; @@ -121,8 +117,6 @@ void validate_3d(const char *name, double *v_ref, double *v_field, int nlon, /* Computes and prints errors in the "L2 norm sense" */ int b, bsize, jl, jk, jm; double zminval, zmaxval, zdiff, zmaxerr, zerrsum, zsum, zrelerr, zavgpgp; - double (*field)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_field; - double (*reference)[nclv][nlev][nlon] = (double (*)[nclv][nlev][nlon]) v_ref; zminval = +DBL_MAX; zmaxval = -DBL_MAX; From e7e614cd6975a8612876d3f59bfcee2765e68855 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 20 Feb 2024 00:54:07 +0100 Subject: [PATCH 161/174] Provide consistent CPP definition to loki_transform --- src/cloudsc_loki/CMakeLists.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/cloudsc_loki/CMakeLists.txt b/src/cloudsc_loki/CMakeLists.txt index 94f339e2..1493bb7a 100644 --- a/src/cloudsc_loki/CMakeLists.txt +++ b/src/cloudsc_loki/CMakeLists.txt @@ -33,7 +33,7 @@ if( HAVE_CLOUDSC_LOKI ) # OFP frontend cannot deal with statement functions, so we toggle them here set( CLOUDSC_DEFINE_STMT_FUNC "" ) if(NOT "${LOKI_FRONTEND}" STREQUAL "ofp") - set( CLOUDSC_DEFINE_STMT_FUNC CLOUDSC_STMT_FUNC ) + set( CLOUDSC_DEFINE_STMT_FUNC CLOUDSC_STMT_FUNC ) endif() #################################################### @@ -54,6 +54,8 @@ if( HAVE_CLOUDSC_LOKI ) MODE idem CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config CPP + DEFINITIONS + ${CLOUDSC_DEFINE_STMT_FUNC} FRONTEND ${LOKI_FRONTEND} SOURCES ${CMAKE_CURRENT_SOURCE_DIR} @@ -127,6 +129,8 @@ if( HAVE_CLOUDSC_LOKI ) MODE idem-stack CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config CPP + DEFINITIONS + ${CLOUDSC_DEFINE_STMT_FUNC} FRONTEND ${LOKI_FRONTEND} SOURCES ${CMAKE_CURRENT_SOURCE_DIR} @@ -207,6 +211,8 @@ if( HAVE_CLOUDSC_LOKI ) MODE sca CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config CPP + DEFINITIONS + ${CLOUDSC_DEFINE_STMT_FUNC} FRONTEND ${LOKI_FRONTEND} SOURCES ${CMAKE_CURRENT_SOURCE_DIR} @@ -265,6 +271,8 @@ if( HAVE_CLOUDSC_LOKI ) MODE claw CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/cloudsc_loki.config CPP + DEFINITIONS + ${CLOUDSC_DEFINE_STMT_FUNC} FRONTEND ${LOKI_FRONTEND} SOURCES ${CMAKE_CURRENT_SOURCE_DIR} From 4b1a18686ce887a386070e5f9c75c5357b7139b1 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 20 Feb 2024 10:16:29 +0200 Subject: [PATCH 162/174] Specify HIP arch for LUMI toolchains --- arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake | 4 ++++ arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake | 2 +- arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake | 7 +++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake index 0774cf51..f5da607a 100644 --- a/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake @@ -40,3 +40,7 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio") set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") + +if(NOT DEFINED CMAKE_HIP_ARCHITECTURES) + set(CMAKE_HIP_ARCHITECTURES gfx90a) +endif() diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake index e7e382b2..191d3fe9 100644 --- a/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/toolchain.cmake @@ -41,7 +41,7 @@ set( OpenACC_Fortran_FLAGS "-hacc -h acc_model=deep_copy" ) #################################################################### set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -03 -ffast-math") -if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) +if(NOT DEFINED CMAKE_HIP_ARCHITECTURES) set(CMAKE_HIP_ARCHITECTURES gfx90a) endif() diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake index 272ab042..3957f68b 100644 --- a/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake @@ -44,6 +44,9 @@ set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed") set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG") -set( GPU_TARGETS "gfx90a" CACHE STRING "" ) -# select OpenMP pragma to be used +if(NOT DEFINED CMAKE_HIP_ARCHITECTURES) + set(CMAKE_HIP_ARCHITECTURES gfx90a) +endif() + +# select OpenMP pragma to be used set( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OFF CACHE BOOL "" ) From d95b1937bbfc8f337467d3629defa61475265f85 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 20 Feb 2024 10:17:02 +0200 Subject: [PATCH 163/174] Inspect hdf5 module instead of loading it to overcome Cray module nonsense --- arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh | 20 +++++++++++++++++--- arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh | 20 +++++++++++++++++--- arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh | 20 +++++++++++++++++--- src/cloudsc_hip/CMakeLists.txt | 14 +++++++------- 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh index 2cc0f9b2..1d38cf63 100644 --- a/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh +++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/env.sh @@ -34,15 +34,29 @@ module_load cray-mpich/8.1.18 module_load craype/2.7.17 module_load craype-accel-amd-gfx90a module_load buildtools/22.08 -module_load cray-hdf5/1.12.1.5 module_load cray-python/3.9.12.1 +### Handling of "magic" cray modules +# 1) Load the cray modules +module_load cray-hdf5/1.12.1.5 +# 2) Store variables to locate the packages +_HDF5_ROOT=${CRAY_HDF5_PREFIX} +# 3) Unload the cray modules in reverse order, removing all the magic +module_unload cray-hdf5 +# 4) Define variables that CMake introspects +export HDF5_ROOT=${_HDF5_ROOT} + +# Export environment variable3s +export MPI_HOME=${MPICH_DIR} +export CC=cc +export CXX=CC +export FC=ftn +export HIPCXX=$(hipconfig --hipclangpath)/clang++ + module list set -x -export CC=cc CXX=CC FC=ftn - # Restore tracing to stored setting { if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null diff --git a/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh index 9a66e150..cd2ceff6 100644 --- a/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh +++ b/arch/eurohpc/lumi/cray-gpu/15.0.1/env.sh @@ -34,17 +34,31 @@ module_load cray-mpich/8.1.18 module_load craype/2.7.20 module_load craype-accel-amd-gfx90a module_load buildtools/23.03 -module_load cray-hdf5/1.12.1.5 module_load cray-python/3.9.12.1 module_load Boost/1.81.0-cpeCray-23.03 module_load partition/G +### Handling of "magic" cray modules +# 1) Load the cray modules +module_load cray-hdf5/1.12.1.5 +# 2) Store variables to locate the packages +_HDF5_ROOT=${CRAY_HDF5_PREFIX} +# 3) Unload the cray modules in reverse order, removing all the magic +module_unload cray-hdf5 +# 4) Define variables that CMake introspects +export HDF5_ROOT=${_HDF5_ROOT} + +# Export environment variable3s +export MPI_HOME=${MPICH_DIR} +export CC=cc +export CXX=CC +export FC=ftn +export HIPCXX=$(hipconfig --hipclangpath)/clang++ + module list set -x -export CC=cc CXX=CC FC=ftn - # Restore tracing to stored setting { if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null diff --git a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh index 663721c8..ae68ca01 100644 --- a/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh +++ b/arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh @@ -34,16 +34,30 @@ module_load rocm/5.2.3 module_load buildtools/23.09 module_load Boost/1.82.0-cpeCray-23.09 module_load cray-python/3.10.10 -module_load cray-hdf5/1.12.2.7 module_load craype-x86-trento module_load craype-accel-amd-gfx90a +### Handling of "magic" cray modules +# 1) Load the cray modules +module_load cray-hdf5/1.12.2.7 +# 2) Store variables to locate the packages +_HDF5_ROOT=${CRAY_HDF5_PREFIX} +# 3) Unload the cray modules in reverse order, removing all the magic +module_unload cray-hdf5 +# 4) Define variables that CMake introspects +export HDF5_ROOT=${_HDF5_ROOT} + +# Export environment variable3s +export MPI_HOME=${MPICH_DIR} +export CC=cc +export CXX=CC +export FC=ftn +export HIPCXX=$(hipconfig --hipclangpath)/clang++ + module list set -x -export CC=cc CXX=CC FC=ftn - # Restore tracing to stored setting { if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null diff --git a/src/cloudsc_hip/CMakeLists.txt b/src/cloudsc_hip/CMakeLists.txt index 3651d2bd..20fb57a0 100644 --- a/src/cloudsc_hip/CMakeLists.txt +++ b/src/cloudsc_hip/CMakeLists.txt @@ -38,7 +38,7 @@ if( HAVE_CLOUDSC_HIP ) $ PUBLIC_LIBS hip::device - $<${HAVE_HDF5}:hdf5::hdf5> + $<${HAVE_HDF5}:hdf5::hdf5> $<${HAVE_SERIALBOX}:Serialbox::Serialbox_C> $<${HAVE_OMP}:OpenMP::OpenMP_C> DEFINITIONS @@ -53,7 +53,7 @@ if( HAVE_CLOUDSC_HIP ) else() target_compile_options(dwarf-cloudsc-hip-lib PRIVATE --offload-arch=${CMAKE_HIP_ARCHITECTURES}) endif() - + ecbuild_add_executable( TARGET dwarf-cloudsc-hip SOURCES dwarf_cloudsc.cpp @@ -67,7 +67,7 @@ if( HAVE_CLOUDSC_HIP ) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 ) - ## + ## ###### SCC-HOIST-HIP #### ecbuild_add_library( @@ -119,7 +119,7 @@ if( HAVE_CLOUDSC_HIP ) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 ) - ## + ## ###### SCC-K-CACHING-HIP #### ecbuild_add_library( @@ -171,14 +171,14 @@ if( HAVE_CLOUDSC_HIP ) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/../../.. OMP 1 ) - ## - + ## + # Create symlink for the input data if( HAVE_SERIALBOX ) execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/../../data ${CMAKE_CURRENT_BINARY_DIR}/../../../data ) endif() - + if( HAVE_HDF5 ) execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/../../config-files/input.h5 ${CMAKE_CURRENT_BINARY_DIR}/../../../input.h5 ) From 1528699baea88f87e19f5aa78359c2b1063c0bb6 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Tue, 20 Feb 2024 10:28:08 +0200 Subject: [PATCH 164/174] Fix OpenMP on Cray 14.0.2 in LUMI --- arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake index f5da607a..1547aa29 100644 --- a/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake +++ b/arch/eurohpc/lumi/cray-gpu/14.0.2/toolchain.cmake @@ -18,8 +18,13 @@ set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" ) #################################################################### set( ENABLE_OMP ON CACHE STRING "" ) -set( OpenMP_C_FLAGS "-homp" CACHE STRING "" ) -set( OpenMP_Fortran_FLAGS "-homp" CACHE STRING "" ) +set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" ) +set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" ) + +set( OpenMP_C_LIB_NAMES "craymp" ) +set( OpenMP_CXX_LIB_NAMES "craymp" ) +set( OpenMP_Fortran_LIB_NAMES "craymp" ) +set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/14.0.2/cce/x86_64/lib/libcraymp.so" ) #################################################################### # OpenACC FLAGS From 1f1d221879672e1bb879c8ff4e0f564b742e7f03 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Wed, 28 Feb 2024 17:53:06 +0100 Subject: [PATCH 165/174] Overwrite frontend for header modules --- src/cloudsc_loki/cloudsc_loki.config | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/cloudsc_loki/cloudsc_loki.config b/src/cloudsc_loki/cloudsc_loki.config index d85e2096..92f9be59 100644 --- a/src/cloudsc_loki/cloudsc_loki.config +++ b/src/cloudsc_loki/cloudsc_loki.config @@ -35,3 +35,16 @@ block = ['parkind1', 'yomphyder', 'yoecldp', 'fc*_mod'] [dimensions.block_dim] size = 'NGPBLKS' index = 'IBL' + + +# Overwrite frontend for header modules that cannot be parsed via OMNI +[frontend_args] + +[frontend_args."yomphyder.F90"] +frontend = 'FP' + +[frontend_args."yomcst.F90"] +frontend = 'FP' + +[frontend_args."yoethf.F90"] +frontend = 'FP' From f05ceac51fd5b01689b4101fee5b8f426d1cf919 Mon Sep 17 00:00:00 2001 From: Antoine Morvan Date: Tue, 19 Mar 2024 16:54:59 +0100 Subject: [PATCH 166/174] Add semantic for negative thread count Zero or negative values given as first argument of the various prototypes led to errors. This commit adds semantic to 0 or negative values by enabling the program to read the OMP_NUM_THREAD value from the environment, or using the number of procs availables if the variable is not defined. --- src/cloudsc_c/dwarf_cloudsc.c | 3 +++ src/cloudsc_cuda/dwarf_cloudsc.cpp | 3 +++ src/cloudsc_fortran/dwarf_cloudsc.F90 | 5 +++++ src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 | 5 +++++ src/cloudsc_loki/dwarf_cloudsc.F90 | 4 ++++ 5 files changed, 20 insertions(+) diff --git a/src/cloudsc_c/dwarf_cloudsc.c b/src/cloudsc_c/dwarf_cloudsc.c index 43b91b84..a6b8bef4 100644 --- a/src/cloudsc_c/dwarf_cloudsc.c +++ b/src/cloudsc_c/dwarf_cloudsc.c @@ -33,6 +33,9 @@ int main( int argc, char *argv[] ) { omp_threads = atoi( argv[1] ); ngptot = atoi( argv[2] ); nproma = atoi( argv[3] ); + if (omp_threads <= 0) { + omp_threads = omp_get_max_threads(); + } cloudsc_driver(omp_threads, ngptot, nproma); } else { diff --git a/src/cloudsc_cuda/dwarf_cloudsc.cpp b/src/cloudsc_cuda/dwarf_cloudsc.cpp index 43b91b84..a6b8bef4 100644 --- a/src/cloudsc_cuda/dwarf_cloudsc.cpp +++ b/src/cloudsc_cuda/dwarf_cloudsc.cpp @@ -33,6 +33,9 @@ int main( int argc, char *argv[] ) { omp_threads = atoi( argv[1] ); ngptot = atoi( argv[2] ); nproma = atoi( argv[3] ); + if (omp_threads <= 0) { + omp_threads = omp_get_max_threads(); + } cloudsc_driver(omp_threads, ngptot, nproma); } else { diff --git a/src/cloudsc_fortran/dwarf_cloudsc.F90 b/src/cloudsc_fortran/dwarf_cloudsc.F90 index c67f8de7..790c55a6 100644 --- a/src/cloudsc_fortran/dwarf_cloudsc.F90 +++ b/src/cloudsc_fortran/dwarf_cloudsc.F90 @@ -19,6 +19,8 @@ PROGRAM DWARF_CLOUDSC USE YOMCST , ONLY : YRCST USE YOETHF , ONLY : YRTHF +USE OMP_LIB + IMPLICIT NONE CHARACTER(LEN=20) :: CLARG @@ -47,6 +49,9 @@ PROGRAM DWARF_CLOUDSC if (IARGS >= 1) then CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) READ(CLARG(1:LENARG),*) NUMOMP + if (NUMOMP <= 0) then + NUMOMP = OMP_GET_MAX_THREADS() + end if end if ! Initialize MPI environment diff --git a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 index bef2a13b..75b789ff 100644 --- a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 +++ b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 @@ -42,6 +42,8 @@ PROGRAM DWARF_CLOUDSC USE CLOUDSC_DRIVER_GPU_SCC_FIELD_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_FIELD #endif +USE OMP_LIB + IMPLICIT NONE CHARACTER(LEN=20) :: CLARG @@ -69,6 +71,9 @@ PROGRAM DWARF_CLOUDSC if (IARGS >= 1) then CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) READ(CLARG(1:LENARG),*) NUMOMP + if (NUMOMP <= 0) then + NUMOMP = OMP_GET_MAX_THREADS() + end if end if ! Initialize MPI environment diff --git a/src/cloudsc_loki/dwarf_cloudsc.F90 b/src/cloudsc_loki/dwarf_cloudsc.F90 index 71f6b576..920ae524 100644 --- a/src/cloudsc_loki/dwarf_cloudsc.F90 +++ b/src/cloudsc_loki/dwarf_cloudsc.F90 @@ -18,6 +18,7 @@ PROGRAM DWARF_CLOUDSC USE CLOUDSC_DRIVER_LOKI_MOD, ONLY: CLOUDSC_DRIVER #endif USE EC_PMON_MOD, ONLY: EC_PMON +USE OMP_LIB IMPLICIT NONE @@ -46,6 +47,9 @@ PROGRAM DWARF_CLOUDSC if (IARGS >= 1) then CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) READ(CLARG(1:LENARG),*) NUMOMP + if (NUMOMP <= 0) then + NUMOMP = OMP_GET_MAX_THREADS() + end if end if ! Initialize MPI environment From c79a56c0c631d7747cd7834a590e1bc6a6eeebc4 Mon Sep 17 00:00:00 2001 From: Antoine Morvan Date: Tue, 19 Mar 2024 18:45:59 +0100 Subject: [PATCH 167/174] (Doc) Update readme Add first argument values and the associated effect on the multithreaded behavior --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index a9e9d59b..3f1f2a82 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,9 @@ The different prototype variants of the dwarf create different binaries that all behave similarly. The basic three arguments define (in this order): - Number of OpenMP threads + - 1 : single thread mode, skip multithread MPI init; default value; + - 2 or higher : force OpenMP thread count, enables multithread MPI; + - 0 or negative : read OMP_NUM_THREADS variable if present or defaults to CPU count (`omp_get_max_threads()`); - Size of overall working set in columns - Block size (NPROMA) in columns From cb45ce8450a245986722d86daac49554fa3c0140 Mon Sep 17 00:00:00 2001 From: Michael Lange Date: Thu, 21 Mar 2024 11:58:29 +0000 Subject: [PATCH 168/174] Loki: Adjust CUDA-speific loki config file to Loki v0.2.0 --- src/cloudsc_loki/cloudsc_cuf_loki.config | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cloudsc_loki/cloudsc_cuf_loki.config b/src/cloudsc_loki/cloudsc_cuf_loki.config index 8150ed9e..dd2470b8 100644 --- a/src/cloudsc_loki/cloudsc_cuf_loki.config +++ b/src/cloudsc_loki/cloudsc_cuf_loki.config @@ -6,11 +6,13 @@ strict = true # Throw exceptions during dicovery # Ensure that we are never adding these to the tree, and thus # do not attempt to look up the source files for these. -disable = ['timer%start', 'timer%end', 'timer%thread_start', 'timer%thread_end', - 'timer%thread_log', 'timer%thread_log', 'timer%print_performance', - 'performance_timer%start', 'performance_timer%end', 'performance_timer%thread_start', - 'performance_timer%thread_end', 'performance_timer%thread_log', - 'performance_timer%thread_log', 'performance_timer%print_performance'] +disable = [ + 'timer_mod', 'abort', 'file_io_mod', 'foe*', 'fokoop', + 'ceiling', 'dim3', 'cuda*' +] + +# Prune the tree for these to ensure they are not processed by transformations +block = ['parkind1', 'yomphyder', 'yoecldp', 'fc*_mod'] # Define entry point for call-tree transformation [routines] From b4102edab4e059d94a365255b05f391f382cc581 Mon Sep 17 00:00:00 2001 From: Antoine Morvan Date: Thu, 21 Mar 2024 14:41:51 +0100 Subject: [PATCH 169/174] Fix support for OpenMP-less build --- src/cloudsc_c/dwarf_cloudsc.c | 5 +++++ src/cloudsc_cuda/dwarf_cloudsc.cpp | 5 +++++ src/cloudsc_fortran/dwarf_cloudsc.F90 | 11 +++++++++-- src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 | 17 ++++++++++++----- src/cloudsc_loki/dwarf_cloudsc.F90 | 18 +++++++++++++----- 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/cloudsc_c/dwarf_cloudsc.c b/src/cloudsc_c/dwarf_cloudsc.c index a6b8bef4..5a171a59 100644 --- a/src/cloudsc_c/dwarf_cloudsc.c +++ b/src/cloudsc_c/dwarf_cloudsc.c @@ -34,7 +34,12 @@ int main( int argc, char *argv[] ) { ngptot = atoi( argv[2] ); nproma = atoi( argv[3] ); if (omp_threads <= 0) { +#ifdef _OPENMP omp_threads = omp_get_max_threads(); +#else + // if arg is 0 or negative, and OpenMP disabled; defaults to 1 + omp_threads = 1; +#endif } cloudsc_driver(omp_threads, ngptot, nproma); } diff --git a/src/cloudsc_cuda/dwarf_cloudsc.cpp b/src/cloudsc_cuda/dwarf_cloudsc.cpp index a6b8bef4..5a171a59 100644 --- a/src/cloudsc_cuda/dwarf_cloudsc.cpp +++ b/src/cloudsc_cuda/dwarf_cloudsc.cpp @@ -34,7 +34,12 @@ int main( int argc, char *argv[] ) { ngptot = atoi( argv[2] ); nproma = atoi( argv[3] ); if (omp_threads <= 0) { +#ifdef _OPENMP omp_threads = omp_get_max_threads(); +#else + // if arg is 0 or negative, and OpenMP disabled; defaults to 1 + omp_threads = 1; +#endif } cloudsc_driver(omp_threads, ngptot, nproma); } diff --git a/src/cloudsc_fortran/dwarf_cloudsc.F90 b/src/cloudsc_fortran/dwarf_cloudsc.F90 index 790c55a6..6418d4f0 100644 --- a/src/cloudsc_fortran/dwarf_cloudsc.F90 +++ b/src/cloudsc_fortran/dwarf_cloudsc.F90 @@ -19,7 +19,9 @@ PROGRAM DWARF_CLOUDSC USE YOMCST , ONLY : YRCST USE YOETHF , ONLY : YRTHF +#ifdef _OPENMP USE OMP_LIB +#endif IMPLICIT NONE @@ -47,10 +49,15 @@ PROGRAM DWARF_CLOUDSC ! Get the number of OpenMP threads to use for the benchmark if (IARGS >= 1) then - CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) - READ(CLARG(1:LENARG),*) NUMOMP + CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NUMOMP if (NUMOMP <= 0) then +#ifdef _OPENMP NUMOMP = OMP_GET_MAX_THREADS() +#else + ! if arg is 0 or negative, and OpenMP disabled; defaults to 1 + NUMOMP = 1 +#endif end if end if diff --git a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 index 75b789ff..b823260a 100644 --- a/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 +++ b/src/cloudsc_gpu/dwarf_cloudsc_gpu.F90 @@ -42,7 +42,9 @@ PROGRAM DWARF_CLOUDSC USE CLOUDSC_DRIVER_GPU_SCC_FIELD_MOD, ONLY: CLOUDSC_DRIVER_GPU_SCC_FIELD #endif +#ifdef _OPENMP USE OMP_LIB +#endif IMPLICIT NONE @@ -69,11 +71,16 @@ PROGRAM DWARF_CLOUDSC ! Get the number of OpenMP threads to use for the benchmark if (IARGS >= 1) then - CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) - READ(CLARG(1:LENARG),*) NUMOMP - if (NUMOMP <= 0) then - NUMOMP = OMP_GET_MAX_THREADS() - end if + CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NUMOMP + if (NUMOMP <= 0) then +#ifdef _OPENMP + NUMOMP = OMP_GET_MAX_THREADS() +#else + ! if arg is 0 or negative, and OpenMP disabled; defaults to 1 + NUMOMP = 1 +#endif + end if end if ! Initialize MPI environment diff --git a/src/cloudsc_loki/dwarf_cloudsc.F90 b/src/cloudsc_loki/dwarf_cloudsc.F90 index 920ae524..67857a78 100644 --- a/src/cloudsc_loki/dwarf_cloudsc.F90 +++ b/src/cloudsc_loki/dwarf_cloudsc.F90 @@ -18,7 +18,10 @@ PROGRAM DWARF_CLOUDSC USE CLOUDSC_DRIVER_LOKI_MOD, ONLY: CLOUDSC_DRIVER #endif USE EC_PMON_MOD, ONLY: EC_PMON + +#ifdef _OPENMP USE OMP_LIB +#endif IMPLICIT NONE @@ -45,11 +48,16 @@ PROGRAM DWARF_CLOUDSC ! Get the number of OpenMP threads to use for the benchmark if (IARGS >= 1) then - CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) - READ(CLARG(1:LENARG),*) NUMOMP - if (NUMOMP <= 0) then - NUMOMP = OMP_GET_MAX_THREADS() - end if + CALL GET_COMMAND_ARGUMENT(1, CLARG, LENARG) + READ(CLARG(1:LENARG),*) NUMOMP + if (NUMOMP <= 0) then +#ifdef _OPENMP + NUMOMP = OMP_GET_MAX_THREADS() +#else + ! if arg is 0 or negative, and OpenMP disabled; defaults to 1 + NUMOMP = 1 +#endif + end if end if ! Initialize MPI environment From 1feb2307ed2e607591dd8c1ffb2b8a1b178cbed1 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Mar 2024 12:25:11 +0100 Subject: [PATCH 170/174] Update Version of actions to remove node.js warnings --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3260f6b..7647451c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -95,7 +95,7 @@ jobs: # Steps represent a sequence of tasks that will be executed as part of the job steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # Installs required packages - name: Package installation @@ -177,7 +177,7 @@ jobs: # Upload test output - name: Archive CTest output - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: ${{ !contains(matrix.build_flags, '--single-precision') }} with: name: ctest-log From 33dac54a18f7d24e0924379a3d5c9a1107d11211 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Mar 2024 12:36:32 +0100 Subject: [PATCH 171/174] Remove ctest log archiving as artifact --- .github/workflows/build.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7647451c..0bf31421 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -174,11 +174,3 @@ jobs: run: | source env.sh ctest -O ctest.log --output-on-failure -E "${{ matrix.ctest_exclude_pattern }}" - - # Upload test output - - name: Archive CTest output - uses: actions/upload-artifact@v4 - if: ${{ !contains(matrix.build_flags, '--single-precision') }} - with: - name: ctest-log - path: build/ctest.log From 4ccf5d3175c87a028773ab65f0da2b0c49df1dc8 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Mar 2024 13:35:04 +0100 Subject: [PATCH 172/174] Update loki version to 0.2.0 and field_api to 0.3.0 --- bundle.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bundle.yml b/bundle.yml index 98fe9467..8049c540 100644 --- a/bundle.yml +++ b/bundle.yml @@ -35,7 +35,7 @@ projects : - loki : git : https://github.com/ecmwf-ifs/loki - version : main + version : v0.2.0 optional: true require : ecbuild cmake : > @@ -54,7 +54,7 @@ projects : - field_api : git : https://github.com/ecmwf-ifs/field_api.git - version : main + version : 0.3.0 optional: true require : ecbuild cmake : > From f485cb061505efb255b8f156f91a595ae6f3e06e Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Mar 2024 17:09:07 +0100 Subject: [PATCH 173/174] Add new authors --- AUTHORS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AUTHORS.md b/AUTHORS.md index 650e42ac..e52d1ddc 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -13,7 +13,9 @@ - L. Lucido (Atos) - O. Marsden (ECMWF) - G. Mengaldo (ECMWF) +- A. Morvan (Atos) - G. Mozdzynski (ECMWF) +- A. Nawab (ECMWF) - Z. Piotrowski (ECMWF) - B. Reuter (ECMWF) - D. Salmond (ECMWF) From b2b2a887f43463a1b72cc1dd71254a2f7b1dd5f0 Mon Sep 17 00:00:00 2001 From: Balthasar Reuter Date: Fri, 22 Mar 2024 17:09:58 +0100 Subject: [PATCH 174/174] Update version to 1.5.0 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 88c5fb89..bc80560f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.0 +1.5.0