diff --git a/LICENSE b/LICENSE index 7a1ad19..6e21471 100644 --- a/LICENSE +++ b/LICENSE @@ -176,6 +176,7 @@ END OF TERMS AND CONDITIONS Copyright 2018- ECMWF + Copyright 2023- NVIDIA Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/field_RANKSUFF_data_module.fypp b/field_RANKSUFF_data_module.fypp index 05d129e..4f95107 100644 --- a/field_RANKSUFF_data_module.fypp +++ b/field_RANKSUFF_data_module.fypp @@ -1,5 +1,6 @@ #! (C) Copyright 2022- ECMWF. #! (C) Copyright 2022- Meteo-France. +#! (C) Copyright 2023- NVIDIA #! #! This software is licensed under the terms of the Apache Licence Version 2.0 #! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. @@ -16,11 +17,61 @@ ${fieldType.useParkind1 ()}$ IMPLICIT NONE +PRIVATE + +#:for ft in fieldTypeList +#:set ftn = ft.name + +PUBLIC :: ${ftn}$_COPY + +#:endfor + CONTAINS #:for ft in fieldTypeList #:set ftn = ft.name + SUBROUTINE ${ftn}$_COPY (HST, DEV, MAP_DEVPTR, KDIR, QUEUE) + + USE FIELD_ABORT_MODULE + + ${ft.type}$, POINTER :: HST (${ft.shape}$), DEV (${ft.shape}$) + LOGICAL, INTENT (IN) :: MAP_DEVPTR + INTEGER (KIND=JPIM), INTENT (IN) :: KDIR + INTEGER (KIND=JPIM), OPTIONAL, INTENT (IN) :: QUEUE + INTEGER :: LAST_CONTIG_DIM + INTEGER :: NEXT_CONTIG_DIM + + LAST_CONTIG_DIM = ${ftn}$_GET_LAST_CONTIGUOUS_DIMENSION (HST, 0) + NEXT_CONTIG_DIM = ${ftn}$_GET_LAST_CONTIGUOUS_DIMENSION (HST, LAST_CONTIG_DIM+1) + + SELECT CASE (LAST_CONTIG_DIM) +#:if defined('CUDA') +#:for d1 in range (ft.rank) + CASE (${d1}$) + SELECT CASE (NEXT_CONTIG_DIM) + #:for d2 in range (d1+1, ft.rank+1) + CASE (${d2}$) + CALL ${ftn}$_COPY_2D_DIM${d1}$_${d2}$_CONTIGUOUS (HST, DEV, MAP_DEVPTR, KDIR, QUEUE) + #:endfor + CASE DEFAULT + CALL FIELD_ABORT ('INTERNAL ERROR: UNEXPECTED NEXT_CONTIG_DIM') + END SELECT +#:endfor + CASE (${ft.rank}$) + CALL ${ftn}$_COPY_DIM${ft.rank}$_CONTIGUOUS (HST, DEV, MAP_DEVPTR, KDIR, QUEUE) +#:else +#:for d in range (ft.rank + 1) + CASE (${d}$) + CALL ${ftn}$_COPY_DIM${d}$_CONTIGUOUS (HST, DEV, MAP_DEVPTR, KDIR, QUEUE) +#:endfor +#:endif + CASE DEFAULT + CALL FIELD_ABORT ('INTERNAL ERROR: UNEXPECTED LAST_CONTIG_DIM') + END SELECT + + END SUBROUTINE + #:for d in range (0, ft.rank+1) SUBROUTINE ${ftn}$_COPY_DIM${d}$_CONTIGUOUS (HST, DEV, MAP_DEVPTR, KDIR, QUEUE) #ifdef _OPENACC @@ -85,22 +136,111 @@ CONTAINS #:endfor +#:if defined('CUDA') +#:for d1 in range (0, ft.rank) +#:for d2 in range (d1+1, ft.rank+1) + SUBROUTINE ${ftn}$_COPY_2D_DIM${d1}$_${d2}$_CONTIGUOUS (HST, DEV, MAP_DEVPTR, KDIR, QUEUE) + USE OPENACC + USE CUDAFOR + USE FIELD_ABORT_MODULE + + ${ft.type}$, POINTER :: HST (${ft.shape}$), DEV (${ft.shape}$) + INTEGER (KIND=JPIM), INTENT (IN) :: KDIR + LOGICAL, INTENT (IN) :: MAP_DEVPTR + INTEGER (KIND=JPIM), OPTIONAL, INTENT (IN) :: QUEUE + INTEGER (KIND=JPIM) :: IHST_PITCH, IDEV_PITCH, IRET + INTEGER (KIND=JPIM) :: IWIDTH, IHEIGHT, ISHP(${ft.rank+1}$) + #:if d2 < ft.rank + INTEGER :: ${', '.join (list (map (lambda i: 'J' + str (i+1), range (d2, ft.rank))))}$ + #:endif + INTEGER(KIND=CUDA_STREAM_KIND) :: STREAM + TYPE(C_PTR) :: HSTPTR + TYPE(C_DEVPTR) :: DEVPTR + + ISHP(1) = 1 + ISHP(2:) = SHAPE(HST) + IWIDTH = PRODUCT(ISHP(1:${d1+1}$)) * KIND(HST) + IHEIGHT = PRODUCT(ISHP(${d1+2}$:${d2+1}$)) + +#:set lb = lambda arr, i: f'LBOUND({arr}, {i+1})' +#:set lbnds = lambda arr, start, end: [lb(arr, i) for i in range(start, end)] +#:set this_slice = lambda arr: ', '.join(lbnds(arr, 0, ft.rank)) +#:set next_slice = lambda arr: ', '.join(lbnds(arr, 0, d1) + [lb(arr, d1)+'+1'] + lbnds(arr, d1+1, ft.rank)) + IHST_PITCH = LOC (HST(${next_slice('HST')}$)) - LOC (HST (${this_slice('HST')}$)) + IDEV_PITCH = LOC (DEV(${next_slice('DEV')}$)) - LOC (DEV (${this_slice('DEV')}$)) + + #:for e in range (ft.rank, d2, -1) + ${' ' * (ft.rank - e)}$DO J${e}$ = LBOUND (HST, ${e}$), UBOUND (HST, ${e}$) + #:endfor + #:set indent = ' ' * (ft.rank - d2 - 1) + #:set ar = lambda arr: ', '.join(lbnds(arr, 0, d2) + [f'J{i+1}' for i in range(d2, ft.rank)]) + ${indent}$ HSTPTR = C_LOC(HST (${ar('HST')}$)) + ${indent}$ IF (MAP_DEVPTR) THEN + ${indent}$ !$acc host_data use_device(DEV) + ${indent}$ DEVPTR = C_DEVLOC(DEV (${ar('DEV')}$)) + ${indent}$ !$acc end host_data + ${indent}$ ELSE + ${indent}$ !$acc data deviceptr(DEVPTR,DEV) + ${indent}$ DEVPTR = C_DEVLOC(DEV (${ar('DEV')}$)) + ${indent}$ !$acc end data + ${indent}$ ENDIF + ${indent}$ IF (KDIR == NH2D) THEN + ${indent}$ IF(PRESENT(QUEUE)) THEN + ${indent}$ CALL ACC_SET_CUDA_STREAM(QUEUE, STREAM) + ${indent}$ IRET = CUDAMEMCPY2DASYNC (DEVPTR, IDEV_PITCH, & + ${indent}$ & HSTPTR, IHST_PITCH, & + ${indent}$ & IWIDTH, IHEIGHT, & + ${indent}$ & STREAM=STREAM) + ${indent}$ ELSE + ${indent}$ IRET = CUDAMEMCPY2D (DEVPTR, IDEV_PITCH, & + ${indent}$ & HSTPTR, IHST_PITCH, & + ${indent}$ & IWIDTH, IHEIGHT) + ${indent}$ ENDIF + ${indent}$ IF (IRET /= CUDASUCCESS) THEN + ${indent}$ CALL FIELD_ABORT ("${ftn}$_COPY_2D_DIM${d1}$_${d2}$_CONTIGUOUS: HOST-TO-DEVICE TRANSFER FAILED") + ${indent}$ ENDIF + ${indent}$ ELSEIF (KDIR == ND2H) THEN + ${indent}$ IF(PRESENT(QUEUE)) THEN + ${indent}$ CALL ACC_SET_CUDA_STREAM(QUEUE, STREAM) + ${indent}$ IRET = CUDAMEMCPY2DASYNC (HSTPTR, IHST_PITCH, & + ${indent}$ & DEVPTR, IDEV_PITCH, & + ${indent}$ & IWIDTH, IHEIGHT, & + ${indent}$ & STREAM=STREAM) + ${indent}$ ELSE + ${indent}$ IRET = CUDAMEMCPY2D (HSTPTR, IHST_PITCH, & + ${indent}$ & DEVPTR, IDEV_PITCH, & + ${indent}$ & IWIDTH, IHEIGHT) + ${indent}$ ENDIF + ${indent}$ IF (IRET /= CUDASUCCESS) THEN + ${indent}$ CALL FIELD_ABORT ("${ftn}$_COPY_2D_DIM${d1}$_${d2}$_CONTIGUOUS: DEVICE-TO-HOST TRANSFER FAILED") + ${indent}$ ENDIF + ${indent}$ ENDIF + #:for e in range (d2, ft.rank) + ${' ' * (ft.rank - e - 1)}$ENDDO + #:endfor + END SUBROUTINE +#:endfor +#:endfor +#:endif + #:endfor #:for ft in fieldTypeList #:set ftn = ft.name - INTEGER (KIND=JPIM) FUNCTION ${ftn}$_GET_LAST_CONTIGUOUS_DIMENSION (PTR) RESULT (JDIM) + + INTEGER (KIND=JPIM) FUNCTION ${ftn}$_GET_LAST_CONTIGUOUS_DIMENSION (PTR, AFTER) RESULT (JDIM) ${ft.type}$, POINTER :: PTR (${ft.shape}$) - INTEGER*8 :: ISTRIDE (${ft.rank}$) + INTEGER (KIND=JPIM) :: AFTER + INTEGER*8 :: IPREVIOUS_STRIDE, ITHIS_STRIDE, ISIZE INTEGER (KIND=JPIM) :: J, LB(${ft.rank}$) + ! assume that dimension all dimensions before AFTER are contiguous... + LB = LBOUND(PTR) - ISTRIDE (1) = KIND (PTR) - DO J = 2, ${ft.rank}$ - ISTRIDE (J) = ISTRIDE (J-1) * SIZE (PTR, J-1) - ENDDO + IF (AFTER == 0) THEN + IPREVIOUS_STRIDE = KIND (PTR) + ENDIF - JDIM = 0 #:for d in range (1, ft.rank+1) #:set ind0 = "" #:set ind1 = "" @@ -110,14 +250,26 @@ CONTAINS #:endfor #:set ind0 = ind0[:-2] #:set ind1 = ind1[:-2] - IF (LOC (PTR (${ind1}$)) - LOC (PTR (${ind0}$)) /= ISTRIDE (${d}$)) THEN - RETURN + ITHIS_STRIDE = LOC (PTR (${ind1}$)) - LOC (PTR (${ind0}$)) + IF (AFTER < ${d}$) THEN + #:if d == 1 + ISIZE = 1 + #:else + ISIZE = SIZE(PTR, ${d-1}$) + #:endif + IF (SIZE(PTR, ${d}$) /= 1 .AND. IPREVIOUS_STRIDE * ISIZE /= ITHIS_STRIDE) THEN + JDIM = ${d-1}$ + RETURN + ENDIF + IPREVIOUS_STRIDE = IPREVIOUS_STRIDE * ISIZE + ELSE IF (AFTER == ${d}$) THEN + IPREVIOUS_STRIDE = ITHIS_STRIDE ENDIF - JDIM = ${d}$ - #:endfor + JDIM = ${ft.rank}$ END FUNCTION ${ftn}$_GET_LAST_CONTIGUOUS_DIMENSION + #:endfor END MODULE FIELD_${RANK}$${SUFF}$_DATA_MODULE diff --git a/field_RANKSUFF_module.fypp b/field_RANKSUFF_module.fypp index 997cef5..8785faf 100644 --- a/field_RANKSUFF_module.fypp +++ b/field_RANKSUFF_module.fypp @@ -1,5 +1,6 @@ #! (C) Copyright 2022- ECMWF. #! (C) Copyright 2022- Meteo-France. +#! (C) Copyright 2023- NVIDIA #! #! This software is licensed under the terms of the Apache Licence Version 2.0 #! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. @@ -221,8 +222,6 @@ CONTAINS ! By default we allocate thread-local temporaries SELF%THREAD_BUFFER = .TRUE. - SELF%LAST_CONTIGUOUS_DIMENSION = ${ft.rank}$ - IF (PRESENT(PERSISTENT)) THEN IF (PERSISTENT) THEN SELF%THREAD_BUFFER = .FALSE. @@ -378,15 +377,8 @@ CONTAINS INTEGER (KIND=JPIM), OPTIONAL, INTENT(IN) :: QUEUE REAL :: START, FINISH - SELF%LAST_CONTIGUOUS_DIMENSION = ${ftn}$_GET_LAST_CONTIGUOUS_DIMENSION (SELF%PTR) - CALL CPU_TIME(START) - SELECT CASE (SELF%LAST_CONTIGUOUS_DIMENSION) -#:for d in range (ft.rank + 1) - CASE (${d}$) - CALL ${ftn}$_COPY_DIM${d}$_CONTIGUOUS (SELF%PTR, SELF%DEVPTR, SELF%MAP_DEVPTR, KDIR, QUEUE) -#:endfor - END SELECT + CALL ${ftn}$_COPY (SELF%PTR, SELF%DEVPTR, SELF%MAP_DEVPTR, KDIR, QUEUE) CALL CPU_TIME(FINISH) IF (KDIR == NH2D) THEN CALL SELF%STATS%INC_CPU_TO_GPU_TRANSFER(START, FINISH) diff --git a/field_basic_module.F90 b/field_basic_module.F90 index 4589542..97de3cf 100644 --- a/field_basic_module.F90 +++ b/field_basic_module.F90 @@ -31,7 +31,6 @@ MODULE FIELD_BASIC_MODULE LOGICAL :: THREAD_BUFFER = .FALSE. INTEGER(KIND=JPIM) :: ISTATUS = 0 - INTEGER(KIND=JPIM) :: LAST_CONTIGUOUS_DIMENSION = 0 TYPE(GPU_STATS) :: STATS diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cd5290b..7512b94 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -59,6 +59,7 @@ list(APPEND TEST_FILES init_wrapper_gpu.F90 init_wrapper_lbounds.F90 init_wrapper_non_contiguous.F90 + init_wrapper_non_contiguous_multi.F90 no_transfer_get_device.F90 no_transfer_get_host.F90 pointer_to_owner_wrapper.F90 diff --git a/tests/init_wrapper_non_contiguous_multi.F90 b/tests/init_wrapper_non_contiguous_multi.F90 new file mode 100644 index 0000000..6c108a6 --- /dev/null +++ b/tests/init_wrapper_non_contiguous_multi.F90 @@ -0,0 +1,274 @@ +! (C) Copyright 2022- ECMWF. +! (C) Copyright 2022- Meteo-France. +! (C) Copyright 2023- NVIDIA +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +PROGRAM INIT_WRAPPER_NON_CONTIGUOUS_MULTI + ! TEST IF WRAPPER WORKS WITH NON CONTIGUOUS ARRAY + USE FIELD_MODULE + USE PARKIND1 + USE FIELD_FACTORY_MODULE + use iso_c_binding + + IMPLICIT NONE + REAL(KIND=JPRB), ALLOCATABLE, TARGET :: D1(:,:,:,:,:), D2(:,:,:,:,:) + CLASS(FIELD_2RB), POINTER :: W2 => NULL() + REAL(KIND=JPRB), POINTER :: W2PTR(:,:) + CLASS(FIELD_3RB), POINTER :: W3 => NULL() + REAL(KIND=JPRB), POINTER :: W3PTR(:,:,:) + CLASS(FIELD_4RB), POINTER :: W4 => NULL() + REAL(KIND=JPRB), POINTER :: W4PTR(:,:,:,:) + CLASS(FIELD_5RB), POINTER :: W5 => NULL() + REAL(KIND=JPRB), POINTER :: W5PTR(:,:,:,:,:) + integer(kind=8) :: ptr + + ALLOCATE(D1(7, 9, 11, 13, 15)) + ALLOCATE(D2(7, 9, 11, 13, 15)) + D1 = 0 + D2 = 0 + + PRINT *, "begin 1 (should call FIELD_4RB_COPY_2D_DIM1_4_CONTIGUOUS)" + CALL FIELD_NEW(W4, DATA=D1(1:1,:,:,:,3)) + CALL W4%GET_HOST_DATA_RDWR(W4PTR) + W4PTR=42 + CALL W4%GET_DEVICE_DATA_RDWR(W4PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W4PTR=92 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(1:1,:,:,:,3)=92 + CALL W4%GET_HOST_DATA_RDONLY(W4PTR) + CALL FIELD_DELETE(W4) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 1" + PRINT *, "" + + PRINT *, "begin 2 (should call FIELD_4RB_COPY_2D_DIM1_3_CONTIGUOUS)" + ! Should call DIM1_3 + CALL FIELD_NEW(W3, DATA=D1(:,2,:,:,3)) + CALL W3%GET_HOST_DATA_RDWR(W3PTR) + W3PTR=51 + CALL W3%GET_DEVICE_DATA_RDWR(W3PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W3PTR=61 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,2,:,:,3)=61 + CALL FIELD_DELETE(W3) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 2" + PRINT *, "" + + PRINT *, "begin 3 (should call FIELD_4RB_COPY_2D_DIM3_4_CONTIGUOUS)" + CALL FIELD_NEW(W4, DATA=D1(:,:,4:8,:,3)) + CALL W4%GET_DEVICE_DATA_RDWR(W4PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W4PTR=31 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,:,4:8,:,3)=31 + CALL FIELD_DELETE(W4) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 3" + PRINT *, "" + + PRINT *, "begin 4 (should call FIELD_3RB_COPY_2D_DIM1_2_CONTIGUOUS)" + CALL FIELD_NEW(W3, DATA=D1(:,2,4:8,3:5,3)) + CALL W3%GET_DEVICE_DATA_RDWR(W3PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W3PTR=91 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,2,4:8,3:5,3)=91 + CALL FIELD_DELETE(W3) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 4" + PRINT *, "" + + PRINT *, "begin 5 (should call FIELD_2RB_COPY_2D_DIM1_2_CONTIGUOUS)" + CALL FIELD_NEW(W2, DATA=D1(:,2,4:8,8,3)) + CALL W2%GET_DEVICE_DATA_RDWR(W2PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W2PTR=12.1 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,2,4:8,8,3)=12.1 + CALL FIELD_DELETE(W2) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 5" + PRINT *, "" + + PRINT *, "begin 6 (should call FIELD_4RB_COPY_2D_DIM2_4_CONTIGUOUS)" + CALL FIELD_NEW(W4, DATA=D1(:,:,4,:,:)) + CALL W4%GET_DEVICE_DATA_RDWR(W4PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W4PTR=22.1 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,:,4,:,:)=22.1 + CALL FIELD_DELETE(W4) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 6" + PRINT *, "" + + + PRINT *, "begin 7 (should call FIELD_5RB_COPY_DIM5_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(:,1:1,1:1,1:1,1:1)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=1.1 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,1:1,1:1,1:1,1:1)=1.1 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 7" + PRINT *, "" + + PRINT *, "begin 8 (should call FIELD_5RB_COPY_2D_DIM3_5_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(:3,1:1,3:3,:,2:4)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=1.2 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:3,1:1,3:3,:,2:4)=1.2 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 8" + PRINT *, "" + + PRINT *, "begin 9 (should call FIELD_5RB_COPY_2D_DIM3_5_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(:,1:1,3:3,:,2:4)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=2.5 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,1:1,3:3,:,2:4)=2.5 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 9" + PRINT *, "" + + PRINT *, "begin 10 (should call FIELD_5RB_COPY_2D_DIM2_4_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(:,1:1,:,1:5,2:4)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=9.1 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,1:1,:,1:5,2:4)=9.1 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 10" + PRINT *, "" + + PRINT *, "begin 11 (should call FIELD_5RB_COPY_2D_DIM2_4_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(:,1:1,:,8:12,:)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=8.1 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,1:1,:,8:12,:)=8.1 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 11" + PRINT *, "" + + PRINT *, "begin 12 (should call FIELD_5RB_COPY_2D_DIM1_5_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(3:7,:,:,:,3:3)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=8.4 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(3:7,:,:,:,3:3)=8.4 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 12" + PRINT *, "" + + PRINT *, "begin 13 (should call FIELD_5RB_COPY_2D_DIM1_5_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(3:3,:,:,:,:)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=12 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(3:3,:,:,:,:)=12 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 13" + PRINT *, "" + + PRINT *, "begin 14 (should call FIELD_5RB_COPY_2D_DIM1_2_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(1:4,1:9:2,:,3:12:3,:)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=18 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(1:4,1:9:2,:,3:12:3,:)=18 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 14" + PRINT *, "" + + PRINT *, "begin 15 (should call FIELD_5RB_COPY_2D_DIM1_2_CONTIGUOUS)" + CALL FIELD_NEW(W5, DATA=D1(:,1:9:2,:,3:12:3,:)) + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) + !$ACC KERNELS DEFAULT(PRESENT) + W5PTR=19 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,1:9:2,:,3:12:3,:)=19 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 15" + PRINT *, "" + + PRINT *, "begin 16 (should call FIELD_5RB_COPY_2D_DIM3_4_CONTIGUOUS)" +#ifdef _CUDA + CALL FIELD_NEW(W5, DATA=D1(:,:,:,3:12:3,:), MAP_DEVPTR=.FALSE.) +#else + CALL FIELD_NEW(W5, DATA=D1(:,:,:,3:12:3,:), MAP_DEVPTR=.TRUE.) +#endif + CALL W5%GET_DEVICE_DATA_RDWR(W5PTR) +#ifdef _CUDA + !$ACC KERNELS DEVICEPTR(W5PTR) +#else + !$ACC KERNELS PRESENT(W5PTR) +#endif + W5PTR=19 + !$ACC END KERNELS + D1 = -1 + D2 = -1 + D2(:,:,:,3:12:3,:)=19 + CALL FIELD_DELETE(W5) + IF (ANY(D1/=D2)) ERROR STOP + PRINT *, "end 16" + PRINT *, "" + +END PROGRAM INIT_WRAPPER_NON_CONTIGUOUS_MULTI