Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
7f7dcff2
Commit
7f7dcff2
authored
Mar 25, 2017
by
Andreas Marek
Browse files
Cleanup of generic simple kernel
parent
c9172d13
Changes
4
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
src/compute_hh_trafo.X90
View file @
7f7dcff2
...
...
@@ -401,11 +401,15 @@
#ifdef WITH_OPENMP
#ifdef USE_ASSUMED_SIZE
call double_hh_trafo_generic_simple_&
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_generic_simple_&
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
...
...
@@ -414,11 +418,15 @@
#else /* WITH_OPENMP */
#ifdef USE_ASSUMED_SIZE
call double_hh_trafo_generic_simple_&
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_generic_simple_&
call double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (a(1:stripe_width,j+off+a_off-1:j+off+a_off-1+nbw,istripe), w, nbw, nl, stripe_width, nbw)
...
...
src/elpa2_kernels/elpa2_kernels_complex_simple.F90
View file @
7f7dcff2
...
...
@@ -68,22 +68,21 @@ module complex_generic_simple_kernel
contains
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
_COMPLEX
1
#
define DATATYPE ck8
#define DOUBLE_PRECISION 1
#
include "../precision_macros.h"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_COMPLEX
#undef DATATYPE
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#define COMPLEXCASE 1
#
un
def
DOUB
LE_PRECISION
_COMPLEX
#
define DATATYPE ck4
#def
ine SING
LE_PRECISION
1
#
include "../precision_macros.h"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_COMPLEX
#undef DATATYPE
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif
end
module
complex_generic_simple_kernel
...
...
src/elpa2_kernels/elpa2_kernels_real_simple.F90
View file @
7f7dcff2
...
...
@@ -62,31 +62,29 @@
module
real_generic_simple_kernel
private
public
double_hh_trafo_generic_simple_double
public
double_hh_trafo_
real_
generic_simple_double
#ifdef WANT_SINGLE_PRECISION_REAL
public
double_hh_trafo_generic_simple_single
public
double_hh_trafo_
real_
generic_simple_single
#endif
contains
#endif
#define REALCASE 1
#define DOUBLE_PRECISION
_REAL
1
#
define DATATYPE rk8
#define DOUBLE_PRECISION 1
#
include "../precision_macros.h"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_REAL
#undef DATATYPE
#undef REALCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_REAL
#define REALCASE 1
#
un
def
DOUB
LE_PRECISION
_REAL
#
define DATATYPE rk4
#def
ine SING
LE_PRECISION
1
#
include "../precision_macros.h"
#include "elpa2_kernels_simple_template.X90"
#undef DOUBLE_PRECISION_REAL
#undef DATATYPE
#undef REALCASE
#undef SINGLE_PRECISION
#endif
#ifndef USE_ASSUMED_SIZE
...
...
src/elpa2_kernels/elpa2_kernels_simple_template.X90
View file @
7f7dcff2
...
...
@@ -61,41 +61,39 @@
#if COMPLEXCASE==1
! the intel compiler creates a temp copy of array q
! this should be avoided without using assumed size arrays
#ifdef DOUBLE_PRECISION_COMPLEX
subroutine single_hh_trafo_complex_generic_simple_double(q, hh, nb, nq, ldq)
#else
subroutine single_hh_trafo_complex_generic_simple_single(q, hh, nb, nq, ldq)
#endif
subroutine single_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (q, hh, nb, nq, ldq)
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq
#ifdef USE_ASSUMED_SIZE
complex(kind=DATATYPE), intent(inout) :: q(ldq,*)
complex(kind=DATATYPE), intent(in) :: hh(*)
complex(kind=
C_
DATATYPE
_KIND
), intent(inout) :: q(ldq,*)
complex(kind=
C_
DATATYPE
_KIND
), intent(in) :: hh(*)
#else
complex(kind=DATATYPE), intent(inout) :: q(1:ldq,1:nb)
complex(kind=DATATYPE), intent(in) :: hh(1:nb)
complex(kind=
C_
DATATYPE
_KIND
), intent(inout) :: q(1:ldq,1:nb)
complex(kind=
C_
DATATYPE
_KIND
), intent(in) :: hh(1:nb)
#endif
integer(kind=ik) :: i
complex(kind=DATATYPE) :: h1, tau1, x(nq)
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=C_DATATYPE_KIND) :: h1, tau1, x(nq)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple_double")
#endif
#else /* DOUBLE_PRECISION_COMPLEX */
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple_single")
#endif
#endif /* DOUBLE_PRECISION_COMPLEX */
call timer%start("kernel_&
&MATH_DATATYPE&
&_generic_simple: single_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple" // &
&PRECISION_SUFFIX &
)
! Just one Householder transformation
...
...
@@ -113,52 +111,46 @@
do i=2,nb
q(1:nq,i) = q(1:nq,i) + x(1:nq)*hh(i)
enddo
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple_double")
#endif
#else /* DOUBLE_PRECISION_COMPLEX */
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple_single")
#endif
call timer%stop("kernel_&
&MATH_DATATYPE&
&_generic_simple: single_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple" // &
&PRECISION_SUFFIX &
)
#endif /* DOUBLE_PRECISION_COMPLEX */
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine single_hh_trafo_complex_generic_simple_double
#else
end subroutine single_hh_trafo_complex_generic_simple_single
#endif
end subroutine
#endif /* COMPLEXCASE == 1 */
! --------------------------------------------------------------------------------------------------
#if REALCASE==1
#ifdef DOUBLE_PRECISION_REAL
subroutine double_hh_trafo_generic_simple_double(q, hh, nb, nq, ldq, ldh)
#else
subroutine double_hh_trafo_generic_simple_single(q, hh, nb, nq, ldq, ldh)
#endif
subroutine double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (q, hh, nb, nq, ldq, ldh)
#endif /* REALCASE == 1 */
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
subroutine double_hh_trafo_complex_generic_simple_double(q, hh, nb, nq, ldq, ldh)
#else
subroutine double_hh_trafo_complex_generic_simple_single(q, hh, nb, nq, ldq, ldh)
#endif
subroutine double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple_&
&PRECISION&
& (q, hh, nb, nq, ldq, ldh)
#endif /* COMPLEXCASE==1 */
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
implicit none
...
...
@@ -166,63 +158,35 @@
#if REALCASE==1
#ifdef USE_ASSUMED_SIZE
real(kind=DATATYPE), intent(inout) :: q(ldq,*)
real(kind=DATATYPE), intent(in) :: hh(ldh,*)
real(kind=
C_
DATATYPE
_KIND
), intent(inout) :: q(ldq,*)
real(kind=
C_
DATATYPE
_KIND
), intent(in) :: hh(ldh,*)
#else
real(kind=DATATYPE), intent(inout) :: q(1:ldq,1:nb+1)
real(kind=DATATYPE), intent(in) :: hh(1:ldh,1:6)
real(kind=
C_
DATATYPE
_KIND
), intent(inout) :: q(1:ldq,1:nb+1)
real(kind=
C_
DATATYPE
_KIND
), intent(in) :: hh(1:ldh,1:6)
#endif
real(kind=DATATYPE) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
real(kind=
C_
DATATYPE
_KIND
) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
#ifdef USE_ASSUMED_SIZE
complex(kind=DATATYPE), intent(inout) :: q(ldq,*)
complex(kind=DATATYPE), intent(in) :: hh(ldh,*)
complex(kind=
C_
DATATYPE
_KIND
), intent(inout) :: q(ldq,*)
complex(kind=
C_
DATATYPE
_KIND
), intent(in) :: hh(ldh,*)
#else
complex(kind=DATATYPE), intent(inout) :: q(1:ldq,1:nb+1)
complex(kind=DATATYPE), intent(in) :: hh(1:ldh,1:2)
complex(kind=
C_
DATATYPE
_KIND
), intent(inout) :: q(1:ldq,1:nb+1)
complex(kind=
C_
DATATYPE
_KIND
), intent(in) :: hh(1:ldh,1:2)
#endif
complex(kind=DATATYPE) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
complex(kind=
C_
DATATYPE
_KIND
) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
#endif /* COMPLEXCASE==1 */
integer(kind=ik) :: i
#if REALCASE==1
#ifdef DOUBLE_PRECISION_REAL
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel real generic simple: double_hh_trafo_real_generic_simple_double")
#endif
#else
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel real generic simple: double_hh_trafo_real_generic_simple_single")
#endif
#endif
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel complex generic simple: double_hh_trafo_complex_generic_simple_double")
#endif
#else
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel complex generic simple: double_hh_trafo_complex_generic_simple_single")
#endif
#endif
#endif /* COMPLEXCASE==1 */
call timer%start("kernel_&
&MATH_DATATYPE&
&_generic_simple: double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple" // &
&PRECISION_SUFFIX &
)
! Calculate dot product of the two Householder vectors
#if REALCASE==1
...
...
@@ -291,58 +255,13 @@
q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1)
#if REALCASE==1
#ifdef DOUBLE_PRECISION_REAL
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel real generic simple: double_hh_trafo_complex_generic_simple_double")
#endif
#else /* DOUBLE_PRECISION_REAL */
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel real generic simple: double_hh_trafo_complex_generic_simple_single")
#endif
#endif /* DOUBLE_PRECISION_REAL */
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: double_hh_trafo_complex_generic_simple_double")
#endif
#else /* DOUBLE_PRECISION_COMPLEX */
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: double_hh_trafo_complex_generic_simple_single")
#endif
#endif /* DOUBLE_PRECISION_COMPLEX */
#endif /* COMPLEXCASE==1 */
#if REALCASE==1
call timer%stop("kernel_&
&MATH_DATATYPE&
&_generic_simple: double_hh_trafo_&
&MATH_DATATYPE&
&_generic_simple" // &
&PRECISION_SUFFIX &
)
#ifdef DOUBLE_PRECISION_REAL
end subroutine double_hh_trafo_generic_simple_double
#else
end subroutine double_hh_trafo_generic_simple_single
#endif
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine double_hh_trafo_complex_generic_simple_double
#else
end subroutine double_hh_trafo_complex_generic_simple_single
#endif
#endif /* COMPLEXCASE==1 */
end subroutine
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment