Commit 48378893 authored by Andreas Marek's avatar Andreas Marek

For performance reasons make usage of assumed-size arrays default

This can be switched off via --disable-assumed-size
parent 0ccc9d8f
...@@ -983,17 +983,17 @@ DX_MAN_FEATURE(ON) ...@@ -983,17 +983,17 @@ DX_MAN_FEATURE(ON)
DX_HTML_FEATURE(ON) DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs]) DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs])
USE_ASSUMED_SIZE=no USE_ASSUMED_SIZE=yes
AC_MSG_CHECKING(whether --enable-assumed-size is specified) AC_MSG_CHECKING(whether --disable-assumed-size is specified)
AC_ARG_ENABLE([assumed-size], AC_ARG_ENABLE([assumed-size],
AS_HELP_STRING([--enable-assumed-size], AS_HELP_STRING([--disable-assumed-size],
[use assumed-size Fortran arrays]), [do NOT use assumed-size Fortran arrays]),
[], [USE_ASSUMED_SIZE=no],
[USE_ASSUMED_SIZE=no]) [USE_ASSUMED_SIZE=yes])
AC_MSG_RESULT([${USE_ASSUMED_SIZE}]) AC_MSG_RESULT([${USE_ASSUMED_SIZE}])
AM_CONDITIONAL([WITH_USE_ASSUMED_SIZE],[test x"$USE_ASSUMED_SIZE" = x"yes"]) AM_CONDITIONAL([WITH_USE_ASSUMED_SIZE],[test x"$USE_ASSUMED_SIZE" = x"yes"])
if test x"${USE_ASSUMED_SIZE}" = x"yes" ; then if test x"${USE_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([USE_ASSUMED_SIZE],[1],[use assumed size Fortran arrays, even if not debuggable]) AC_DEFINE([USE_ASSUMED_SIZE],[1],[for performance reasons use assumed size Fortran arrays, even if not debuggable])
fi fi
if test x"${want_single_precision}" = x"yes" ; then if test x"${want_single_precision}" = x"yes" ; then
......
...@@ -217,9 +217,12 @@ ...@@ -217,9 +217,12 @@
l_cols_tile = tile_size/np_cols ! local cols of a tile l_cols_tile = tile_size/np_cols ! local cols of a tile
if (useGPU) then if (useGPU) then
#if !defined(USE_ASSUMED_SIZE)
if (size(a,dim=1) .ne. lda .or. size(a,dim=2) .ne. na_cols) then if (size(a,dim=1) .ne. lda .or. size(a,dim=2) .ne. na_cols) then
print *,"bandred_complex: sizes of a wrong ? ",lda,size(a,dim=1),na_cols,size(a,dim=2) print *,"bandred_complex: sizes of a wrong ? ",lda,size(a,dim=1),na_cols,size(a,dim=2)
endif endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX #ifdef DOUBLE_PRECISION_COMPLEX
successCUDA = cuda_memcpy(a_dev, loc(a(1,1)),(lda)*(na_cols)*size_of_double_complex_datatype,cudaMemcpyHostToDevice) successCUDA = cuda_memcpy(a_dev, loc(a(1,1)),(lda)*(na_cols)*size_of_double_complex_datatype,cudaMemcpyHostToDevice)
#else #else
...@@ -1046,10 +1049,12 @@ ...@@ -1046,10 +1049,12 @@
enddo ! istep enddo ! istep
if (useGPU) then if (useGPU) then
#if !(defined(USE_ASSUMED_SIZE))
if (size(a,dim=1)*size(a,dim=2) .ne. lda*na_cols) then if (size(a,dim=1)*size(a,dim=2) .ne. lda*na_cols) then
print *,"bandred_complex: size a ",size(a,dim=1)*size(a,dim=2) , lda*na_cols print *,"bandred_complex: size a ",size(a,dim=1)*size(a,dim=2) , lda*na_cols
endif endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX #ifdef DOUBLE_PRECISION_COMPLEX
successCUDA = cuda_memcpy ( loc(a(1,1)), a_dev, lda*na_cols*size_of_double_complex_datatype,cudaMemcpyDeviceToHost) successCUDA = cuda_memcpy ( loc(a(1,1)), a_dev, lda*na_cols*size_of_double_complex_datatype,cudaMemcpyDeviceToHost)
#else #else
......
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include "config-f90.h" #include "config-f90.h"
#ifndef USE_ASSUMED_SIZE
module real_generic_simple_kernel module real_generic_simple_kernel
private private
...@@ -68,6 +69,7 @@ module real_generic_simple_kernel ...@@ -68,6 +69,7 @@ module real_generic_simple_kernel
#endif #endif
contains contains
#endif
#define REALCASE 1 #define REALCASE 1
#define DOUBLE_PRECISION_REAL 1 #define DOUBLE_PRECISION_REAL 1
...@@ -87,5 +89,7 @@ module real_generic_simple_kernel ...@@ -87,5 +89,7 @@ module real_generic_simple_kernel
#undef REALCASE #undef REALCASE
#endif #endif
#ifndef USE_ASSUMED_SIZE
end module real_generic_simple_kernel end module real_generic_simple_kernel
#endif
! -------------------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------------------
...@@ -213,7 +213,7 @@ ...@@ -213,7 +213,7 @@
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATPYE), intent(inout) :: q(ldq/2,*) complex(kind=COMPLEX_DATATYPE), intent(inout) :: q(ldq/2,*)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*) real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*)
#else #else
real(kind=REAL_DATATYPE), intent(inout) :: q(:,:) real(kind=REAL_DATATYPE), intent(inout) :: q(:,:)
...@@ -222,7 +222,7 @@ ...@@ -222,7 +222,7 @@
real(kind=REAL_DATATYPE), intent(in) :: s real(kind=REAL_DATATYPE), intent(in) :: s
#ifdef PACK_REAL_TO_COMPLEX #ifdef PACK_REAL_TO_COMPLEX
complex(kind=COMPLEX_DATATPYE) :: x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6 complex(kind=COMPLEX_DATATYPE) :: x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6
#else #else
real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, & real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, &
y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12 y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12
...@@ -463,7 +463,7 @@ ...@@ -463,7 +463,7 @@
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATPYE), intent(inout) :: q(ldq/2,*) complex(kind=COMPLEX_DATATYPE), intent(inout) :: q(ldq/2,*)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*) real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*)
#else #else
real(kind=REAL_DATATYPE), intent(inout) :: q(:,:) real(kind=REAL_DATATYPE), intent(inout) :: q(:,:)
...@@ -471,7 +471,7 @@ ...@@ -471,7 +471,7 @@
#endif #endif
real(kind=REAL_DATATYPE), intent(in) :: s real(kind=REAL_DATATYPE), intent(in) :: s
#ifdef PACK_REAL_TO_COMPLEX #ifdef PACK_REAL_TO_COMPLEX
complex(kind=COMPLEX_DATATPYE) :: x1, x2, x3, x4, y1, y2, y3, y4 complex(kind=COMPLEX_DATATYPE) :: x1, x2, x3, x4, y1, y2, y3, y4
#else #else
real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, x5, x6, x7, x8, & real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, x5, x6, x7, x8, &
y1, y2, y3, y4, y5, y6, y7, y8 y1, y2, y3, y4, y5, y6, y7, y8
...@@ -654,7 +654,7 @@ ...@@ -654,7 +654,7 @@
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATPYE), intent(inout) :: q(ldq/2,*) complex(kind=COMPLEX_DATATYPE), intent(inout) :: q(ldq/2,*)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*) real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*)
#else #else
real(kind=REAL_DATATYPE), intent(inout) :: q(:,:) !q(1:ldq/2,1:nb+1) real(kind=REAL_DATATYPE), intent(inout) :: q(:,:) !q(1:ldq/2,1:nb+1)
...@@ -663,7 +663,7 @@ ...@@ -663,7 +663,7 @@
real(kind=REAL_DATATYPE), intent(in) :: s real(kind=REAL_DATATYPE), intent(in) :: s
#ifdef PACK_REAL_TO_COMPLEX #ifdef PACK_REAL_TO_COMPLEX
complex(kind=COMPLEX_DATATPYE) :: x1, x2, y1, y2 complex(kind=COMPLEX_DATATYPE) :: x1, x2, y1, y2
#else #else
real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, y1, y2, y3, y4 real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, y1, y2, y3, y4
#endif #endif
......
...@@ -202,7 +202,7 @@ ...@@ -202,7 +202,7 @@
call timer%start("kernel real generic simple: double_hh_trafo_real_generic_simple_single") call timer%start("kernel real generic simple: double_hh_trafo_real_generic_simple_single")
#endif #endif
#endif #endif
#endif /* REALCASE==1 */ #endif /* REALCASE==1 */
......
...@@ -84,7 +84,7 @@ module compute_hh_trafo_real ...@@ -84,7 +84,7 @@ module compute_hh_trafo_real
use cuda_c_kernel use cuda_c_kernel
use cuda_functions use cuda_functions
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL) #if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use real_generic_simple_kernel !, only : double_hh_trafo_generic_simple use real_generic_simple_kernel !, only : double_hh_trafo_generic_simple
#endif #endif
...@@ -668,7 +668,7 @@ module compute_hh_trafo_real ...@@ -668,7 +668,7 @@ module compute_hh_trafo_real
use cuda_c_kernel use cuda_c_kernel
use cuda_functions use cuda_functions
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL) #if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use real_generic_simple_kernel !, only : double_hh_trafo_generic_simple use real_generic_simple_kernel !, only : double_hh_trafo_generic_simple
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment