Commit 56af2503 authored by Andreas Marek's avatar Andreas Marek
Browse files

Templatize

parent 9a82b0e4
......@@ -67,6 +67,7 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/helpers/matrix_plot.F90 \
src/general/elpa_ssmv.F90 \
src/general/elpa_ssr2.F90 \
src/general/mod_elpa_skewsymmetric_blas.F90 \
src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
......@@ -123,6 +124,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa1/elpa_invert_trm.F90 \
src/elpa1/elpa_multiply_a_b.F90 \
src/elpa1/elpa_solve_tridi_impl_public.F90 \
src/general/elpa_ssr2_template.F90 \
src/general/elpa_ssmv_template.F90 \
src/general/precision_macros.h \
src/general/precision_typedefs.h \
src/general/precision_kinds.F90
......
......@@ -448,10 +448,11 @@ function elpa_solve_evp_&
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
call trans_ev_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, nev, a, lda, tau, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, nev, a, lda, tau, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), &
ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
endif
#ifdef HAVE_LIKWID
......
......@@ -50,6 +50,7 @@
#include "config-f90.h"
#include "../general/sanity.F90"
#undef ROUTINE_NAME
#ifdef SKEW_SYMMETRIC
#define ROUTINE_NAME elpa_transpose_vectors_ss_
#else
......
......@@ -52,6 +52,7 @@
#include "../general/sanity.F90"
#undef ROUTINE_NAME
#ifdef SKEW_SYMMETRIC
#define ROUTINE_NAME ssymm_matrix_allreduce
#else
......
......@@ -189,7 +189,7 @@
print *,"Problem getting option. Aborting..."
stop
endif
call obj%get("mpi_comm_cols",mpi_comm_cols,error
call obj%get("mpi_comm_cols",mpi_comm_cols,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
......
......@@ -93,7 +93,7 @@
use omp_lib
#endif
use elpa_blas_interfaces
use elpa_skewsymmetric_blas
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -633,7 +633,7 @@
if (isSkewsymmetric) then
! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
hd(:) = 0.0_rk
call elpa_dssmv(nc, tau, ab(1,ns), 2*nb-1, hv, hd)
call ELPA_PRECISION_SSMV(nc, tau, ab(1,ns), 2*nb-1, hv, hd)
else
call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
endif
......@@ -657,7 +657,7 @@
#if REALCASE == 1
if (isSkewsymmetric) then
! call PRECISION_SSR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
call elpa_dssr2( nc, hd, hv, ab(1,ns), 2*nb-1)
call ELPA_PRECISION_SSR2( nc, hd, hv, ab(1,ns), 2*nb-1)
else
call PRECISION_SYR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
endif
......@@ -894,7 +894,7 @@
if (isSkewsymmetric) then
! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
hd(:) = 0.0_rk
call elpa_dssmv( nc, tau, ab(1,ns), 2*nb-1, hv, hd)
call ELPA_PRECISION_SSMV( nc, tau, ab(1,ns), 2*nb-1, hv, hd)
else
call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
endif
......@@ -935,7 +935,7 @@
if (isSkewsymmetric) then
! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
hd(:) = 0.0_rk
call elpa_dssmv(nc, tau, ab(1,ns), 2*nb-1, hv, hd)
call ELPA_PRECISION_SSMV(nc, tau, ab(1,ns), 2*nb-1, hv, hd)
else
call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
endif
......@@ -1060,7 +1060,7 @@
#if REALCASE == 1
if (isSkewsymmetric) then
! if (nc>1) call PRECISION_SSR2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
if (nc>1) call elpa_dssr2(nc-1, hd(2), hv(2), ab(1,ns+1), 2*nb-1)
if (nc>1) call ELPA_PRECISION_SSR2(nc-1, hd(2), hv(2), ab(1,ns+1), 2*nb-1)
else
if (nc>1) call PRECISION_SYR2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
endif
......@@ -1076,7 +1076,7 @@
#if REALCASE == 1
if (isSkewsymmetric) then
! call PRECISION_SSR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
call elpa_dssr2(nc,hd, hv, ab(1,ns), 2*nb-1)
call ELPA_PRECISION_SSR2(nc,hd, hv, ab(1,ns), 2*nb-1)
else
call PRECISION_SYR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
endif
......
subroutine elpa_dssmv( n, alpha, a, lda, x, y )
!
implicit none
!
! .. scalar arguments ..
integer n, lda
double precision alpha
! ..
! .. array arguments ..
double precision a( lda, * ), x( * ), y( * )
! ..
!
! .. parameters ..
double precision zero, one
parameter ( zero = 0.0d+0, one = 1.0d+0 )
integer nb
parameter ( nb = 64 )
! ..
! .. local scalars ..
integer ii, jj, ic, iy, jc, jx, info
double precision temp
! .. local arrays ..
double precision work( nb )
! ..
! .. external subroutines ..
external dgemv, dtrmv, dcopy, daxpy
! ..
! .. intrinsic functions ..
intrinsic max, min
! ..
! .. executable statements ..
! Test the input parameters.
info = 0
if (n .eq. 0) then
return
end if
if ( n .lt. 0 ) then
info = 1
else if ( lda .lt. max( 1,n ) ) then
info = 4
end if
if ( info .ne. 0 ) then
print *,"wrong arguments in elpa_ssmv, info =", info
return
end if
! Access only lower triangular part of a
temp = zero
do jj = 1, n, nb
jc = min( nb, n-jj+1 )
jx = 1 + (jj-1)
do ii = 1, n, nb
ic = min( nb, n-ii+1 )
iy = 1 + (ii-1)
! gemv for non-diagonal blocks. use 2x dtrmv for diagonal blocks
if ( ii .lt. jj ) then
call dgemv( 't', jc, nb, -alpha, a( jj, ii ), lda, &
& x( jx ), 1, temp, y( iy ), 1 )
else if ( ii .gt. jj ) then
call dgemv( 'n', ic, nb, alpha, a( ii, jj ), lda, &
& x( jx ), 1, temp, y( iy ), 1 )
else
if (temp .eq. zero) then
y(1:n) = zero
else if (temp .ne. one) then
! should not happen
call dscal( jc, temp, y( iy ), 1)
end if
call dcopy( jc, x( jx ), 1, work, 1 )
call dtrmv( 'l', 'n', 'n', jc, a( jj, jj ), lda, work, 1 )
call daxpy(jc,alpha,work,1,y( iy ),1)
call dcopy( jc, x( jx ), 1, work, 1 )
call dtrmv( 'l', 't', 'n', jc, a( jj, jj ), lda, work, 1 )
call daxpy(jc,-alpha,work,1,y( iy ),1)
end if
end do
temp = one
end do
!
return
!
! end of elpa_dssmv.
!
end
\ No newline at end of file
#if REALCASE == 1
#ifdef DOUBLE_PRECISION
subroutine elpa_dssmv(n, alpha, a, lda, x, y)
#endif
#ifdef SINGLE_PRECISION
subroutine elpa_sssmv(n, alpha, a, lda, x, y)
#endif
#endif /* REALCASE */
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION
subroutine elpa_zssmv(n, alpha, a, lda, x, y)
#endif
#ifdef SINGLE_PRECISION
subroutine elpa_cssmv(n, alpha, a, lda, x, y)
#endif
#endif /* COMPLEXCASE */
use precision
use elpa_utilities, only : error_unit
!use elpa_blas_interfaces
implicit none
#include "./precision_kinds.F90"
integer(kind=ik) :: n, lda
MATH_DATATYPE(kind=rck) :: alpha
MATH_DATATYPE(kind=rck) :: a( lda, * ), x( * ), y( * )
integer(kind=ik), parameter :: nb = 64
integer(kind=ik) :: ii, jj, ic, iy, jc, jx, info
MATH_DATATYPE(kind=rck) :: temp
MATH_DATATYPE(kind=rck) :: work( nb )
! Test the input parameters.
info = 0
if (n /= 0) then
return
end if
if ( n < 0 ) then
info = 1
else if ( lda < max( 1,n ) ) then
info = 4
end if
if ( info /= 0 ) then
write(error_unit,*) "wrong arguments in elpa_ssmv, info =", info
return
end if
! Access only lower triangular part of a
temp = zero
do jj = 1, n, nb
jc = min( nb, n-jj+1 )
jx = 1 + (jj-1)
do ii = 1, n, nb
ic = min( nb, n-ii+1 )
iy = 1 + (ii-1)
! gemv for non-diagonal blocks. use 2x dtrmv for diagonal blocks
if ( ii < jj ) then
call PRECISION_GEMV('t', jc, nb, -alpha, a( jj, ii ), lda, &
x( jx ), 1, temp, y( iy ), 1 )
else if ( ii > jj ) then
call PRECISION_GEMV('n', ic, nb, alpha, a( ii, jj ), lda, &
x( jx ), 1, temp, y( iy ), 1 )
else
if (temp == zero) then
y(1:n) = zero
else if (temp /= one) then
! should not happen
call PRECISION_SCAL( jc, temp, y( iy ), 1)
end if
call PRECISION_COPY( jc, x( jx ), 1, work, 1 )
call PRECISION_TRMV( 'l', 'n', 'n', jc, a( jj, jj ), lda, work, 1 )
call PRECISION_AXPY(jc,alpha,work,1,y( iy ),1)
call PRECISION_COPY( jc, x( jx ), 1, work, 1 )
call PRECISION_TRMV( 'l', 't', 'n', jc, a( jj, jj ), lda, work, 1 )
call PRECISION_AXPY(jc,-alpha,work,1,y( iy ),1)
end if
end do
temp = one
end do
return
end subroutine
subroutine elpa_dssr2(n, x, y, a, lda )
!
implicit none
!
! .. scalar arguments ..
integer n, lda
! ..
! .. array arguments ..
double precision a( lda, * ), x( * ), y( * )
! ..
!
! .. parameters ..
double precision zero, one, temp1, temp2
parameter ( zero = 0.0d+0, one = 1.0d+0 )
integer nb
parameter ( nb = 64 )
!
! .. local scalars ..
integer i, j, ii, jj, ic, ix, iy, jc, jx, jy, info
logical upper
! .. external subroutines ..
external dger
! ..
! .. intrinsic functions ..
intrinsic max, min
! ..
! .. executable statements ..
!
! test the input parameters.
info = 0
if (n .eq. 0) then
return
end if
if ( n .lt. 0 ) then
info = 1
else if ( lda .lt. max( 1,n ) ) then
info = 5
end if
if ( info .ne. 0 ) then
print *,"wrong arguments in elpa_ssmv, info =", info
return
end if
!
! Access A in lower triangular part.
!
do jj = 1, n, nb
jc = min( nb, n-jj+1 )
jx = 1 + (jj-1)
jy = 1 + (jj-1)
do j = 1, jc-1
! Do local update for blocks on the diagonal
if ( ( x( jx + j -1) .ne. zero ) .or. &
& ( y( jy + j -1 ) .ne. zero ) ) then
temp1 = - y( jy + j - 1 )
temp2 = - x( jy + j - 1 )
do i = j+1, jc
a( jj + i -1 , jj + j -1 ) = a(jj + i -1,jj + j -1 ) + x( jx + i -1)*temp1 - y(jj + i -1 )*temp2
end do
end if
end do
! Use dger for other blocks
do ii = jj+nb, n, nb
ic = min( nb, n-ii+1 )
ix = 1 + (ii-1)
iy = 1 + (ii-1)
call dger( ic, nb, -one, x( ix ), 1, y( jy ), 1, &
& a( ii, jj ), lda )
call dger( ic, nb, one, y( iy ), 1, x( jx ), 1, &
& a( ii, jj ), lda )
end do
end do
return
!
! end of elpa_dssr2.
!
end
!
!
\ No newline at end of file
#if REALCASE == 1
#ifdef DOUBLE_PRECISION
subroutine elpa_dssr2(n, x, y, a, lda )
#endif
#ifdef SINGLE_PRECISION
subroutine elpa_sssr2(n, x, y, a, lda )
#endif
#endif /* REALCASE */
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION
subroutine elpa_zssr2(n, x, y, a, lda )
#endif
#ifdef SINGLE_PRECISION
subroutine elpa_cssr2(n, x, y, a, lda )
#endif
#endif /* COMPLEXCASE */
use precision
use elpa_utilities, only : error_unit
!use elpa_blas_interfaces
implicit none
#include "./precision_kinds.F90"
integer(kind=ik) :: n, lda
MATH_DATATYPE(kind=rck) :: a( lda, * ), x( * ), y( * )
integer(kind=ik), parameter :: nb = 64
MATH_DATATYPE(kind=rck) :: temp1, temp2
integer(kind=ik) :: i, j, ii, jj, ic, ix, iy, jc, jx, jy, info
logical :: upper
! test the input parameters.
info = 0
if (n == 0) then
return
end if
if ( n < 0 ) then
info = 1
else if ( lda < max( 1,n ) ) then
info = 5
end if
if ( info /= 0 ) then
write(error_unit,*) "wrong arguments in elpa_ssmv, info =", info
return
end if
! Access A in lower triangular part.
do jj = 1, n, nb
jc = min( nb, n-jj+1 )
jx = 1 + (jj-1)
jy = 1 + (jj-1)
do j = 1, jc-1
! Do local update for blocks on the diagonal
if ( ( x( jx + j -1) /= zero ) .or. &
( y( jy + j -1 ) /= zero ) ) then
temp1 = - y( jy + j - 1 )
temp2 = - x( jy + j - 1 )
do i = j+1, jc
a( jj + i -1 , jj + j -1 ) = a(jj + i -1,jj + j -1 ) + x( jx + i -1)*temp1 - y(jj + i -1 )*temp2
end do
end if
end do
! Use dger for other blocks
do ii = jj+nb, n, nb
ic = min( nb, n-ii+1 )
ix = 1 + (ii-1)
iy = 1 + (ii-1)
#if REALCASE == 1
call PRECISION_GER(ic, nb, -one, x( ix ), 1, y( jy ), 1, &
a( ii, jj ), lda )
call PRECISION_GER(ic, nb, one, y( iy ), 1, x( jx ), 1, &
a( ii, jj ), lda )
#endif
end do
end do
return
end subroutine
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
! This particular source code file contains additions, changes and
! enhancements authored by Intel Corporation which is not part of
! the ELPA consortium.
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
! Author: Andreas Marek, MPCDF
#include "config-f90.h"
module elpa_skewsymmetric_blas
use precision
use iso_c_binding
contains
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "./precision_macros.h"
#include "./elpa_ssr2_template.F90"
#include "./elpa_ssmv_template.F90"
#undef REALCASE
#undef DOUBLE_PRECISION
#if defined(WANT_SINGLE_PRECISION_REAL)
#define REALCASE 1
#define SINGLE_PRECISION 1
#include "./precision_macros.h"
#include "./elpa_ssr2_template.F90"
#include "./elpa_ssmv_template.F90"
#undef REALCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_REAL */
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "./precision_macros.h"
#include "./elpa_ssr2_template.F90"
#include "./elpa_ssmv_template.F90"
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
#if defined(WANT_SINGLE_PRECISION_COMPLEX)
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "./precision_macros.h"
#include "./elpa_ssr2_template.F90"
#include "./elpa_ssmv_template.F90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
end module
......@@ -41,6 +41,10 @@
#undef PRECISION_LAED5
#undef PRECISION_NRM2
#undef PRECISION_LASET
#undef PRECISION_SCAL
#undef PRECISION_COPY
#undef PRECISION_AXPY
#undef PRECISION_GER
#undef cublas_PRECISION_GEMM
#undef cublas_PRECISION_TRMM
#undef cublas_PRECISION_GEMV
......@@ -51,6 +55,9 @@
#undef PRECISION_SUFFIX
#undef ELPA_IMPL_SUFFIX
#undef ELPA_PRECISION_SSMV
#undef ELPA_PRECISION_SSR2</