Commit 48378893 authored by Andreas Marek's avatar Andreas Marek

For performance reasons make usage of assumed-size arrays default

This can be switched off via --disable-assumed-size
parent 0ccc9d8f
......@@ -983,17 +983,17 @@ DX_MAN_FEATURE(ON)
DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs])
USE_ASSUMED_SIZE=no
AC_MSG_CHECKING(whether --enable-assumed-size is specified)
USE_ASSUMED_SIZE=yes
AC_MSG_CHECKING(whether --disable-assumed-size is specified)
AC_ARG_ENABLE([assumed-size],
AS_HELP_STRING([--enable-assumed-size],
[use assumed-size Fortran arrays]),
[],
[USE_ASSUMED_SIZE=no])
AS_HELP_STRING([--disable-assumed-size],
[do NOT use assumed-size Fortran arrays]),
[USE_ASSUMED_SIZE=no],
[USE_ASSUMED_SIZE=yes])
AC_MSG_RESULT([${USE_ASSUMED_SIZE}])
AM_CONDITIONAL([WITH_USE_ASSUMED_SIZE],[test x"$USE_ASSUMED_SIZE" = x"yes"])
if test x"${USE_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([USE_ASSUMED_SIZE],[1],[use assumed size Fortran arrays, even if not debuggable])
AC_DEFINE([USE_ASSUMED_SIZE],[1],[for performance reasons use assumed size Fortran arrays, even if not debuggable])
fi
if test x"${want_single_precision}" = x"yes" ; then
......
......@@ -217,9 +217,12 @@
l_cols_tile = tile_size/np_cols ! local cols of a tile
if (useGPU) then
#if !defined(USE_ASSUMED_SIZE)
if (size(a,dim=1) .ne. lda .or. size(a,dim=2) .ne. na_cols) then
print *,"bandred_complex: sizes of a wrong ? ",lda,size(a,dim=1),na_cols,size(a,dim=2)
endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
successCUDA = cuda_memcpy(a_dev, loc(a(1,1)),(lda)*(na_cols)*size_of_double_complex_datatype,cudaMemcpyHostToDevice)
#else
......@@ -1046,10 +1049,12 @@
enddo ! istep
if (useGPU) then
#if !(defined(USE_ASSUMED_SIZE))
if (size(a,dim=1)*size(a,dim=2) .ne. lda*na_cols) then
print *,"bandred_complex: size a ",size(a,dim=1)*size(a,dim=2) , lda*na_cols
endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
successCUDA = cuda_memcpy ( loc(a(1,1)), a_dev, lda*na_cols*size_of_double_complex_datatype,cudaMemcpyDeviceToHost)
#else
......
......@@ -58,6 +58,7 @@
#include "config-f90.h"
#ifndef USE_ASSUMED_SIZE
module real_generic_simple_kernel
private
......@@ -68,6 +69,7 @@ module real_generic_simple_kernel
#endif
contains
#endif
#define REALCASE 1
#define DOUBLE_PRECISION_REAL 1
......@@ -87,5 +89,7 @@ module real_generic_simple_kernel
#undef REALCASE
#endif
#ifndef USE_ASSUMED_SIZE
end module real_generic_simple_kernel
#endif
! --------------------------------------------------------------------------------------------------
......@@ -213,7 +213,7 @@
integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATPYE), intent(inout) :: q(ldq/2,*)
complex(kind=COMPLEX_DATATYPE), intent(inout) :: q(ldq/2,*)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*)
#else
real(kind=REAL_DATATYPE), intent(inout) :: q(:,:)
......@@ -222,7 +222,7 @@
real(kind=REAL_DATATYPE), intent(in) :: s
#ifdef PACK_REAL_TO_COMPLEX
complex(kind=COMPLEX_DATATPYE) :: x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6
complex(kind=COMPLEX_DATATYPE) :: x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6
#else
real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, &
y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12
......@@ -463,7 +463,7 @@
integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATPYE), intent(inout) :: q(ldq/2,*)
complex(kind=COMPLEX_DATATYPE), intent(inout) :: q(ldq/2,*)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*)
#else
real(kind=REAL_DATATYPE), intent(inout) :: q(:,:)
......@@ -471,7 +471,7 @@
#endif
real(kind=REAL_DATATYPE), intent(in) :: s
#ifdef PACK_REAL_TO_COMPLEX
complex(kind=COMPLEX_DATATPYE) :: x1, x2, x3, x4, y1, y2, y3, y4
complex(kind=COMPLEX_DATATYPE) :: x1, x2, x3, x4, y1, y2, y3, y4
#else
real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, x5, x6, x7, x8, &
y1, y2, y3, y4, y5, y6, y7, y8
......@@ -654,7 +654,7 @@
integer(kind=ik), intent(in) :: nb, ldq, ldh
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATPYE), intent(inout) :: q(ldq/2,*)
complex(kind=COMPLEX_DATATYPE), intent(inout) :: q(ldq/2,*)
real(kind=REAL_DATATYPE), intent(in) :: hh(ldh,*)
#else
real(kind=REAL_DATATYPE), intent(inout) :: q(:,:) !q(1:ldq/2,1:nb+1)
......@@ -663,7 +663,7 @@
real(kind=REAL_DATATYPE), intent(in) :: s
#ifdef PACK_REAL_TO_COMPLEX
complex(kind=COMPLEX_DATATPYE) :: x1, x2, y1, y2
complex(kind=COMPLEX_DATATYPE) :: x1, x2, y1, y2
#else
real(kind=REAL_DATATYPE) :: x1, x2, x3, x4, y1, y2, y3, y4
#endif
......
......@@ -202,7 +202,7 @@
call timer%start("kernel real generic simple: double_hh_trafo_real_generic_simple_single")
#endif
#endif
#endif
#endif /* REALCASE==1 */
......
......@@ -84,7 +84,7 @@ module compute_hh_trafo_real
use cuda_c_kernel
use cuda_functions
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL)
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use real_generic_simple_kernel !, only : double_hh_trafo_generic_simple
#endif
......@@ -668,7 +668,7 @@ module compute_hh_trafo_real
use cuda_c_kernel
use cuda_functions
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL)
#if defined(WITH_REAL_GENERIC_SIMPLE_KERNEL) && !(defined(USE_ASSUMED_SIZE))
use real_generic_simple_kernel !, only : double_hh_trafo_generic_simple
#endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment