Commit 56af2503 by Andreas Marek

### Templatize

parent 9a82b0e4
 ... ... @@ -67,6 +67,7 @@ libelpa@SUFFIX@_private_la_SOURCES = \ src/helpers/matrix_plot.F90 \ src/general/elpa_ssmv.F90 \ src/general/elpa_ssr2.F90 \ src/general/mod_elpa_skewsymmetric_blas.F90 \ src/elpa_index.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c ... ... @@ -123,6 +124,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \ src/elpa1/elpa_invert_trm.F90 \ src/elpa1/elpa_multiply_a_b.F90 \ src/elpa1/elpa_solve_tridi_impl_public.F90 \ src/general/elpa_ssr2_template.F90 \ src/general/elpa_ssmv_template.F90 \ src/general/precision_macros.h \ src/general/precision_typedefs.h \ src/general/precision_kinds.F90 ... ...
 ... ... @@ -448,10 +448,11 @@ function elpa_solve_evp_& ! Transform imaginary part ! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix. call trans_ev_& &MATH_DATATYPE& &_& &PRECISION& & (obj, na, nev, a, lda, tau, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev) &MATH_DATATYPE& &_& &PRECISION& & (obj, na, nev, a, lda, tau, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), & ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev) endif #ifdef HAVE_LIKWID ... ...
 ... ... @@ -50,6 +50,7 @@ #include "config-f90.h" #include "../general/sanity.F90" #undef ROUTINE_NAME #ifdef SKEW_SYMMETRIC #define ROUTINE_NAME elpa_transpose_vectors_ss_ #else ... ...
 ... ... @@ -52,6 +52,7 @@ #include "../general/sanity.F90" #undef ROUTINE_NAME #ifdef SKEW_SYMMETRIC #define ROUTINE_NAME ssymm_matrix_allreduce #else ... ...
 ... ... @@ -189,7 +189,7 @@ print *,"Problem getting option. Aborting..." stop endif call obj%get("mpi_comm_cols",mpi_comm_cols,error call obj%get("mpi_comm_cols",mpi_comm_cols,error) if (error .ne. ELPA_OK) then print *,"Problem getting option. Aborting..." stop ... ...
 ... ... @@ -93,7 +93,7 @@ use omp_lib #endif use elpa_blas_interfaces use elpa_skewsymmetric_blas implicit none #include "../general/precision_kinds.F90" class(elpa_abstract_impl_t), intent(inout) :: obj ... ... @@ -633,7 +633,7 @@ if (isSkewsymmetric) then ! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1) hd(:) = 0.0_rk call elpa_dssmv(nc, tau, ab(1,ns), 2*nb-1, hv, hd) call ELPA_PRECISION_SSMV(nc, tau, ab(1,ns), 2*nb-1, hv, hd) else call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1) endif ... ... @@ -657,7 +657,7 @@ #if REALCASE == 1 if (isSkewsymmetric) then ! call PRECISION_SSR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1) call elpa_dssr2( nc, hd, hv, ab(1,ns), 2*nb-1) call ELPA_PRECISION_SSR2( nc, hd, hv, ab(1,ns), 2*nb-1) else call PRECISION_SYR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1) endif ... ... @@ -894,7 +894,7 @@ if (isSkewsymmetric) then ! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1) hd(:) = 0.0_rk call elpa_dssmv( nc, tau, ab(1,ns), 2*nb-1, hv, hd) call ELPA_PRECISION_SSMV( nc, tau, ab(1,ns), 2*nb-1, hv, hd) else call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1) endif ... ... @@ -935,7 +935,7 @@ if (isSkewsymmetric) then ! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1) hd(:) = 0.0_rk call elpa_dssmv(nc, tau, ab(1,ns), 2*nb-1, hv, hd) call ELPA_PRECISION_SSMV(nc, tau, ab(1,ns), 2*nb-1, hv, hd) else call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1) endif ... ... @@ -1060,7 +1060,7 @@ #if REALCASE == 1 if (isSkewsymmetric) then ! if (nc>1) call PRECISION_SSR2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1) if (nc>1) call elpa_dssr2(nc-1, hd(2), hv(2), ab(1,ns+1), 2*nb-1) if (nc>1) call ELPA_PRECISION_SSR2(nc-1, hd(2), hv(2), ab(1,ns+1), 2*nb-1) else if (nc>1) call PRECISION_SYR2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1) endif ... ... @@ -1076,7 +1076,7 @@ #if REALCASE == 1 if (isSkewsymmetric) then ! call PRECISION_SSR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1) call elpa_dssr2(nc,hd, hv, ab(1,ns), 2*nb-1) call ELPA_PRECISION_SSR2(nc,hd, hv, ab(1,ns), 2*nb-1) else call PRECISION_SYR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1) endif ... ...
 subroutine elpa_dssmv( n, alpha, a, lda, x, y ) ! implicit none ! ! .. scalar arguments .. integer n, lda double precision alpha ! .. ! .. array arguments .. double precision a( lda, * ), x( * ), y( * ) ! .. ! ! .. parameters .. double precision zero, one parameter ( zero = 0.0d+0, one = 1.0d+0 ) integer nb parameter ( nb = 64 ) ! .. ! .. local scalars .. integer ii, jj, ic, iy, jc, jx, info double precision temp ! .. local arrays .. double precision work( nb ) ! .. ! .. external subroutines .. external dgemv, dtrmv, dcopy, daxpy ! .. ! .. intrinsic functions .. intrinsic max, min ! .. ! .. executable statements .. ! Test the input parameters. info = 0 if (n .eq. 0) then return end if if ( n .lt. 0 ) then info = 1 else if ( lda .lt. max( 1,n ) ) then info = 4 end if if ( info .ne. 0 ) then print *,"wrong arguments in elpa_ssmv, info =", info return end if ! Access only lower triangular part of a temp = zero do jj = 1, n, nb jc = min( nb, n-jj+1 ) jx = 1 + (jj-1) do ii = 1, n, nb ic = min( nb, n-ii+1 ) iy = 1 + (ii-1) ! gemv for non-diagonal blocks. use 2x dtrmv for diagonal blocks if ( ii .lt. jj ) then call dgemv( 't', jc, nb, -alpha, a( jj, ii ), lda, & & x( jx ), 1, temp, y( iy ), 1 ) else if ( ii .gt. jj ) then call dgemv( 'n', ic, nb, alpha, a( ii, jj ), lda, & & x( jx ), 1, temp, y( iy ), 1 ) else if (temp .eq. zero) then y(1:n) = zero else if (temp .ne. one) then ! should not happen call dscal( jc, temp, y( iy ), 1) end if call dcopy( jc, x( jx ), 1, work, 1 ) call dtrmv( 'l', 'n', 'n', jc, a( jj, jj ), lda, work, 1 ) call daxpy(jc,alpha,work,1,y( iy ),1) call dcopy( jc, x( jx ), 1, work, 1 ) call dtrmv( 'l', 't', 'n', jc, a( jj, jj ), lda, work, 1 ) call daxpy(jc,-alpha,work,1,y( iy ),1) end if end do temp = one end do ! return ! ! end of elpa_dssmv. ! end \ No newline at end of file
 #if REALCASE == 1 #ifdef DOUBLE_PRECISION subroutine elpa_dssmv(n, alpha, a, lda, x, y) #endif #ifdef SINGLE_PRECISION subroutine elpa_sssmv(n, alpha, a, lda, x, y) #endif #endif /* REALCASE */ #if COMPLEXCASE == 1 #ifdef DOUBLE_PRECISION subroutine elpa_zssmv(n, alpha, a, lda, x, y) #endif #ifdef SINGLE_PRECISION subroutine elpa_cssmv(n, alpha, a, lda, x, y) #endif #endif /* COMPLEXCASE */ use precision use elpa_utilities, only : error_unit !use elpa_blas_interfaces implicit none #include "./precision_kinds.F90" integer(kind=ik) :: n, lda MATH_DATATYPE(kind=rck) :: alpha MATH_DATATYPE(kind=rck) :: a( lda, * ), x( * ), y( * ) integer(kind=ik), parameter :: nb = 64 integer(kind=ik) :: ii, jj, ic, iy, jc, jx, info MATH_DATATYPE(kind=rck) :: temp MATH_DATATYPE(kind=rck) :: work( nb ) ! Test the input parameters. info = 0 if (n /= 0) then return end if if ( n < 0 ) then info = 1 else if ( lda < max( 1,n ) ) then info = 4 end if if ( info /= 0 ) then write(error_unit,*) "wrong arguments in elpa_ssmv, info =", info return end if ! Access only lower triangular part of a temp = zero do jj = 1, n, nb jc = min( nb, n-jj+1 ) jx = 1 + (jj-1) do ii = 1, n, nb ic = min( nb, n-ii+1 ) iy = 1 + (ii-1) ! gemv for non-diagonal blocks. use 2x dtrmv for diagonal blocks if ( ii < jj ) then call PRECISION_GEMV('t', jc, nb, -alpha, a( jj, ii ), lda, & x( jx ), 1, temp, y( iy ), 1 ) else if ( ii > jj ) then call PRECISION_GEMV('n', ic, nb, alpha, a( ii, jj ), lda, & x( jx ), 1, temp, y( iy ), 1 ) else if (temp == zero) then y(1:n) = zero else if (temp /= one) then ! should not happen call PRECISION_SCAL( jc, temp, y( iy ), 1) end if call PRECISION_COPY( jc, x( jx ), 1, work, 1 ) call PRECISION_TRMV( 'l', 'n', 'n', jc, a( jj, jj ), lda, work, 1 ) call PRECISION_AXPY(jc,alpha,work,1,y( iy ),1) call PRECISION_COPY( jc, x( jx ), 1, work, 1 ) call PRECISION_TRMV( 'l', 't', 'n', jc, a( jj, jj ), lda, work, 1 ) call PRECISION_AXPY(jc,-alpha,work,1,y( iy ),1) end if end do temp = one end do return end subroutine
 subroutine elpa_dssr2(n, x, y, a, lda ) ! implicit none ! ! .. scalar arguments .. integer n, lda ! .. ! .. array arguments .. double precision a( lda, * ), x( * ), y( * ) ! .. ! ! .. parameters .. double precision zero, one, temp1, temp2 parameter ( zero = 0.0d+0, one = 1.0d+0 ) integer nb parameter ( nb = 64 ) ! ! .. local scalars .. integer i, j, ii, jj, ic, ix, iy, jc, jx, jy, info logical upper ! .. external subroutines .. external dger ! .. ! .. intrinsic functions .. intrinsic max, min ! .. ! .. executable statements .. ! ! test the input parameters. info = 0 if (n .eq. 0) then return end if if ( n .lt. 0 ) then info = 1 else if ( lda .lt. max( 1,n ) ) then info = 5 end if if ( info .ne. 0 ) then print *,"wrong arguments in elpa_ssmv, info =", info return end if ! ! Access A in lower triangular part. ! do jj = 1, n, nb jc = min( nb, n-jj+1 ) jx = 1 + (jj-1) jy = 1 + (jj-1) do j = 1, jc-1 ! Do local update for blocks on the diagonal if ( ( x( jx + j -1) .ne. zero ) .or. & & ( y( jy + j -1 ) .ne. zero ) ) then temp1 = - y( jy + j - 1 ) temp2 = - x( jy + j - 1 ) do i = j+1, jc a( jj + i -1 , jj + j -1 ) = a(jj + i -1,jj + j -1 ) + x( jx + i -1)*temp1 - y(jj + i -1 )*temp2 end do end if end do ! Use dger for other blocks do ii = jj+nb, n, nb ic = min( nb, n-ii+1 ) ix = 1 + (ii-1) iy = 1 + (ii-1) call dger( ic, nb, -one, x( ix ), 1, y( jy ), 1, & & a( ii, jj ), lda ) call dger( ic, nb, one, y( iy ), 1, x( jx ), 1, & & a( ii, jj ), lda ) end do end do return ! ! end of elpa_dssr2. ! end ! ! \ No newline at end of file
 #if REALCASE == 1 #ifdef DOUBLE_PRECISION subroutine elpa_dssr2(n, x, y, a, lda ) #endif #ifdef SINGLE_PRECISION subroutine elpa_sssr2(n, x, y, a, lda ) #endif #endif /* REALCASE */ #if COMPLEXCASE == 1 #ifdef DOUBLE_PRECISION subroutine elpa_zssr2(n, x, y, a, lda ) #endif #ifdef SINGLE_PRECISION subroutine elpa_cssr2(n, x, y, a, lda ) #endif #endif /* COMPLEXCASE */ use precision use elpa_utilities, only : error_unit !use elpa_blas_interfaces implicit none #include "./precision_kinds.F90" integer(kind=ik) :: n, lda MATH_DATATYPE(kind=rck) :: a( lda, * ), x( * ), y( * ) integer(kind=ik), parameter :: nb = 64 MATH_DATATYPE(kind=rck) :: temp1, temp2 integer(kind=ik) :: i, j, ii, jj, ic, ix, iy, jc, jx, jy, info logical :: upper ! test the input parameters. info = 0 if (n == 0) then return end if if ( n < 0 ) then info = 1 else if ( lda < max( 1,n ) ) then info = 5 end if if ( info /= 0 ) then write(error_unit,*) "wrong arguments in elpa_ssmv, info =", info return end if ! Access A in lower triangular part. do jj = 1, n, nb jc = min( nb, n-jj+1 ) jx = 1 + (jj-1) jy = 1 + (jj-1) do j = 1, jc-1 ! Do local update for blocks on the diagonal if ( ( x( jx + j -1) /= zero ) .or. & ( y( jy + j -1 ) /= zero ) ) then temp1 = - y( jy + j - 1 ) temp2 = - x( jy + j - 1 ) do i = j+1, jc a( jj + i -1 , jj + j -1 ) = a(jj + i -1,jj + j -1 ) + x( jx + i -1)*temp1 - y(jj + i -1 )*temp2 end do end if end do ! Use dger for other blocks do ii = jj+nb, n, nb ic = min( nb, n-ii+1 ) ix = 1 + (ii-1) iy = 1 + (ii-1) #if REALCASE == 1 call PRECISION_GER(ic, nb, -one, x( ix ), 1, y( jy ), 1, & a( ii, jj ), lda ) call PRECISION_GER(ic, nb, one, y( iy ), 1, x( jx ), 1, & a( ii, jj ), lda ) #endif end do end do return end subroutine
 ! This file is part of ELPA. ! ! The ELPA library was originally created by the ELPA consortium, ! consisting of the following organizations: ! ! - Max Planck Computing and Data Facility (MPCDF), formerly known as ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ! Informatik, ! - Technische Universität München, Lehrstuhl für Informatik mit ! Schwerpunkt Wissenschaftliches Rechnen , ! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ! and ! - IBM Deutschland GmbH ! ! This particular source code file contains additions, changes and ! enhancements authored by Intel Corporation which is not part of ! the ELPA consortium. ! ! More information can be found here: ! http://elpa.mpcdf.mpg.de/ ! ! ELPA is free software: you can redistribute it and/or modify ! it under the terms of the version 3 of the license of the ! GNU Lesser General Public License as published by the Free ! Software Foundation. ! ! ELPA is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU Lesser General Public License for more details. ! ! You should have received a copy of the GNU Lesser General Public License ! along with ELPA. If not, see ! ! ELPA reflects a substantial effort on the part of the original ! ELPA consortium, and we ask you to respect the spirit of the ! license that we chose: i.e., please contribute any changes you ! may have back to the original ELPA library distribution, and keep ! any derivatives of ELPA under the same license that we chose for ! the original distribution, the GNU Lesser General Public License. ! ! ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines ! ! Copyright of the original code rests with the authors inside the ELPA ! consortium. The copyright of any additional modifications shall rest ! with their original authors, but shall adhere to the licensing terms ! distributed along with the original code in the file "COPYING". ! ! Author: Andreas Marek, MPCDF #include "config-f90.h" module elpa_skewsymmetric_blas use precision use iso_c_binding contains #define REALCASE 1 #define DOUBLE_PRECISION 1 #include "./precision_macros.h" #include "./elpa_ssr2_template.F90" #include "./elpa_ssmv_template.F90" #undef REALCASE #undef DOUBLE_PRECISION #if defined(WANT_SINGLE_PRECISION_REAL) #define REALCASE 1 #define SINGLE_PRECISION 1 #include "./precision_macros.h" #include "./elpa_ssr2_template.F90" #include "./elpa_ssmv_template.F90" #undef REALCASE #undef SINGLE_PRECISION #endif /* WANT_SINGLE_PRECISION_REAL */ #define COMPLEXCASE 1 #define DOUBLE_PRECISION 1 #include "./precision_macros.h" #include "./elpa_ssr2_template.F90" #include "./elpa_ssmv_template.F90" #undef COMPLEXCASE #undef DOUBLE_PRECISION #if defined(WANT_SINGLE_PRECISION_COMPLEX) #define COMPLEXCASE 1 #define SINGLE_PRECISION 1 #include "./precision_macros.h" #include "./elpa_ssr2_template.F90" #include "./elpa_ssmv_template.F90" #undef COMPLEXCASE #undef SINGLE_PRECISION #endif /* WANT_SINGLE_PRECISION_COMPLEX */ end module
 ... ... @@ -41,6 +41,10 @@ #undef PRECISION_LAED5 #undef PRECISION_NRM2 #undef PRECISION_LASET #undef PRECISION_SCAL #undef PRECISION_COPY #undef PRECISION_AXPY #undef PRECISION_GER #undef cublas_PRECISION_GEMM #undef cublas_PRECISION_TRMM #undef cublas_PRECISION_GEMV ... ... @@ -51,6 +55,9 @@ #undef PRECISION_SUFFIX #undef ELPA_IMPL_SUFFIX #undef ELPA_PRECISION_SSMV #undef ELPA_PRECISION_SSR2</