Commit cb4c4ae7 authored by Andreas Marek's avatar Andreas Marek

Remove assumed size from generic real kernel

The generic real kernel is now contained in a module, this allows
strict interface checking! It also does not use assumed size arrays
anymore. Both points increase the possibility to debug and find errors.

However, this might be performance critical! It is possible to
switch back to the old implementation if that turns out to
be beneficial w.r.t. performance. Timings with gfortran 4.9 on Intel
Haswell showed that the new implementation is about 30 percent faster
then the previous one
parent 33a94bfc
......@@ -39,7 +39,7 @@ if HAVE_DETAILED_TIMINGS
endif
if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.f90
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.F90
endif
if WITH_COMPLEX_GENERIC_KERNEL
......@@ -308,6 +308,9 @@ elpa2.i: $(top_srcdir)/src/elpa2.F90
elpa1.i: $(top_srcdir)/src/elpa1.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/elpa1.F90 -o $@
elpa2_kernels_real.i: $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90 -o $@
mod_compute_hh_trafo_real.i: $(top_srcdir)/src/mod_compute_hh_trafo_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/mod_compute_hh_trafo_real.F90 -o $@
......
......@@ -666,6 +666,11 @@ DX_MAN_FEATURE(ON)
DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs])
DESPERATELY_WANT_ASSUMED_SIZE=0
if text x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([DESPERATELY_WANT_ASSUMED_SIZE],[1],[use assumed size arrays, even if not debuggable])
fi
AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS])
AC_SUBST([with_amd_bulldozer_kernel])
......
......@@ -32,9 +32,9 @@ module compute_hh_trafo_real
use real_generic_simple_kernel, only : double_hh_trafo_generic_simple
#endif
!#if defined(WITH_REAL_GENERIC_KERNEL)
! use real_generic_kernel, only : double_hh_trafo_generic
!#endif
#if defined(WITH_REAL_GENERIC_KERNEL) && !(defined(DESPERATELY_WANT_ASSUMED_SIZE))
use real_generic_kernel, only : double_hh_trafo_generic
#endif
#if defined(WITH_REAL_BGP_KERNEL)
use real_bgp_kernel, only : double_hh_trafo_bgp
......@@ -124,12 +124,28 @@ module compute_hh_trafo_real
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call double_hh_trafo_generic(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1, &
istripe,my_thread), w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
#endif
#else /* WITH_OPENMP */
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
call double_hh_trafo_generic(a(1,j+off+a_off-1,istripe),w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_generic(a(1,j+off+a_off-1,istripe), w, &
call double_hh_trafo_generic(a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw+1,istripe),w(1:nbw,1:6), &
nbw, nl, stripe_width, nbw)
#endif
#endif /* WITH_OPENMP */
enddo
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment