Single precision support for ELPA2

ELPA2 can now be build (as ELPA1) for single precision calculations.
The ELPA2 kernles which are implemented in assembler, C, or C++ have NOT
yet been ported.

Thus at the moment only the GENERIC and GENERIC_SIMPLE kernels support
single precision calculations
parent de6a4fde
......@@ -669,7 +669,6 @@ module ELPA1_compute
h1(nc+1:nc+n) = tmat(1:n,n+1)
nc = nc+n
enddo
#ifdef DOUBLE_PRECISION_REAL
if (nc>0) call mpi_allreduce( h1, h2, nc, MPI_REAL8, MPI_SUM, mpi_comm_rows, mpierr)
#else
......@@ -2485,9 +2484,13 @@ module ELPA1_compute
#endif
integer(kind=ik), parameter :: max_strip=128
real(kind=rk) :: beta, sig, s, c, t, tau, rho, eps, tol, dlamch, &
dlapy2, qtrans(2,2), dmax, zmax, d1new, d2new
#ifdef DOUBLE_PRECISION_REAL
real(kind=rk) :: dlamch, dlapy2
#else
real(kind=rk) :: slamch, slapy2
#endif
real(kind=rk) :: beta, sig, s, c, t, tau, rho, eps, tol, &
qtrans(2,2), dmax, zmax, d1new, d2new
real(kind=rk) :: z(na), d1(na), d2(na), z1(na), delta(na), &
dbase(na), ddiff(na), ev_scale(na), tmp(na)
real(kind=rk) :: d1u(na), zu(na), d1l(na), zl(na)
......@@ -2633,7 +2636,11 @@ module ELPA1_compute
zmax = maxval(abs(z))
dmax = maxval(abs(d))
#ifdef DOUBLE_PRECISION_REAL
EPS = DLAMCH( 'Epsilon' )
#else
EPS = SLAMCH( 'Epsilon' )
#endif
TOL = 8.*EPS*MAX(dmax,zmax)
! If the rank-1 modifier is small enough, no more needs to be done
......@@ -2692,8 +2699,11 @@ module ELPA1_compute
! Find sqrt(a**2+b**2) without overflow or
! destructive underflow.
#ifdef DOUBLE_PRECISION_REAL
TAU = DLAPY2( C, S )
#else
TAU = SLAPY2( C, S )
#endif
T = D1(na1) - D(idx(i))
C = C / TAU
S = -S / TAU
......
......@@ -146,6 +146,7 @@ contains
use precision
use cuda_functions
use mod_check_for_gpu
use iso_c_binding
implicit none
logical, intent(in), optional :: useQR
logical :: useQRActual, useQREnvironment
......@@ -163,7 +164,7 @@ contains
integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: nbw, num_blocks
real(kind=rk), allocatable :: tmat(:,:,:), e(:)
real(kind=rk) :: ttt0, ttt1, ttts
real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double
integer(kind=ik) :: i
logical :: success
logical, save :: firstCall = .true.
......@@ -294,6 +295,7 @@ contains
ttts = ttt0
call bandred_real(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, useGPU, success, useQRActual)
if (.not.(success)) return
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
......@@ -308,14 +310,19 @@ contains
endif
ttt0 = MPI_Wtime()
call tridiag_band_real(na, nbw, nblk, a, lda, ev, e, matrixCols, hh_trans_real, &
call tridiag_band_real(na, nbw, nblk, a, lda, ev, e, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all)
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_real :',ttt1-ttt0
#ifdef DOUBLE_PRECISION_REAL
call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr)
call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr)
#else
call mpi_bcast(ev,na,MPI_REAL4,0,mpi_comm_all,mpierr)
call mpi_bcast(e,na,MPI_REAL4,0,mpi_comm_all,mpierr)
#endif
ttt1 = MPI_Wtime()
time_evp_fwd = ttt1-ttts
......@@ -426,6 +433,7 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
use precision
use cuda_functions
use mod_check_for_gpu
use iso_c_binding
implicit none
integer(kind=ik), intent(in), optional :: THIS_COMPLEX_ELPA_KERNEL_API
integer(kind=ik) :: THIS_COMPLEX_ELPA_KERNEL
......@@ -440,7 +448,7 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
integer(kind=ik) :: l_cols, l_rows, l_cols_nev, nbw, num_blocks
complex(kind=ck), allocatable :: tmat(:,:,:)
real(kind=rk), allocatable :: q_real(:,:), e(:)
real(kind=rk) :: ttt0, ttt1, ttts
real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double
integer(kind=ik) :: i
logical :: success, wantDebug
......@@ -562,9 +570,13 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_complex :',ttt1-ttt0
call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr)
call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_bcast(ev, na, mpi_real8, 0, mpi_comm_all, mpierr)
call mpi_bcast(e, na, mpi_real8, 0, mpi_comm_all, mpierr)
#else
call mpi_bcast(ev, na, mpi_real4, 0, mpi_comm_all, mpierr)
call mpi_bcast(e, na, mpi_real4, 0, mpi_comm_all, mpierr)
#endif
ttt1 = MPI_Wtime()
time_evp_fwd = ttt1-ttts
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -34,8 +34,9 @@ module compute_hh_trafo_complex
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use iso_c_binding
implicit none
real(kind=rk), intent(inout) :: kernel_time
real(kind=c_double), intent(inout) :: kernel_time ! MPI_WTIME always needs double
integer(kind=lik) :: kernel_flops
integer(kind=ik), intent(in) :: nbw, max_blk_size
complex(kind=ck) :: bcast_buffer(nbw,max_blk_size)
......@@ -57,7 +58,7 @@ module compute_hh_trafo_complex
#ifdef WITH_OPENMP
integer(kind=ik) :: my_thread, noff
#endif
real(kind=rk) :: ttt
real(kind=c_double) :: ttt ! MPI_WTIME always needs double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
......
......@@ -52,7 +52,7 @@ module compute_hh_trafo_real
#endif
implicit none
include "mpif.h"
real(kind=rk), intent(inout) :: kernel_time
real(kind=c_double), intent(inout) :: kernel_time ! MPI_WTIME always needs double
integer(kind=lik) :: kernel_flops
integer(kind=ik), intent(in) :: nbw, max_blk_size
real(kind=rk) :: bcast_buffer(nbw,max_blk_size)
......@@ -82,7 +82,8 @@ module compute_hh_trafo_real
integer(kind=ik) :: my_thread, noff
#endif
integer(kind=ik) :: j, nl, jj, jjj
real(kind=rk) :: w(nbw,6), ttt
real(kind=rk) :: w(nbw,6)
real(kind=c_double) :: ttt ! MPI_WTIME always needs double
if (THIS_REAL_ELPA_KERNEL .eq. REAL_ELPA_KERNEL_GPU) then
! ncols - indicates the number of HH reflectors to apply; at least 1 must be available
......
......@@ -12,8 +12,17 @@ module cuda_functions
integer(kind=ik) :: cudaHostRegisterMapped
integer(kind=ik) :: cudaMemcpyDeviceToDevice
integer(kind=c_size_t), parameter :: size_of_real_datatype = 8_8
integer(kind=c_size_t), parameter :: size_of_complex_datatype = 16_8
#ifdef DOUBLE_PRECISION_REAL
integer(kind=c_size_t), parameter :: size_of_real_datatype = 8_rk
#else
integer(kind=c_size_t), parameter :: size_of_real_datatype = 4_rk
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
integer(kind=c_size_t), parameter :: size_of_complex_datatype = 16_ck
#else
integer(kind=c_size_t), parameter :: size_of_complex_datatype = 8_ck
#endif
! functions to set and query the CUDA devices
......@@ -193,6 +202,20 @@ module cuda_functions
end subroutine cublas_dgemm_c
end interface
interface
subroutine cublas_sgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) bind(C,name='cublasSgemm')
use iso_c_binding
implicit none
character(1,C_CHAR),value :: cta, ctb
integer(kind=C_INT),value :: m,n,k
integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
real(kind=C_FLOAT),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, b, c
end subroutine cublas_sgemm_c
end interface
interface
subroutine cublas_dtrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasDtrmm')
......@@ -207,6 +230,20 @@ module cuda_functions
end subroutine cublas_dtrmm_c
end interface
interface
subroutine cublas_strmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasStrmm')
use iso_c_binding
implicit none
character(1,C_CHAR),value :: side, uplo, trans, diag
integer(kind=C_INT),value :: m,n
integer(kind=C_INT), intent(in), value :: lda,ldb
real(kind=C_FLOAT), value :: alpha
integer(kind=C_intptr_T), value :: a, b
end subroutine cublas_strmm_c
end interface
interface
subroutine cublas_zgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasZgemm')
......@@ -222,6 +259,21 @@ module cuda_functions
end subroutine cublas_zgemm_c
end interface
interface
subroutine cublas_cgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) bind(C,name='cublasCgemm')
use iso_c_binding
implicit none
character(1,C_CHAR),value :: cta, ctb
integer(kind=C_INT),value :: m,n,k
integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
complex(kind=C_FLOAT),value :: alpha,beta
integer(kind=C_intptr_T), value :: a, b, c
end subroutine cublas_cgemm_c
end interface
interface
subroutine cublas_ztrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasZtrmm')
......@@ -237,6 +289,22 @@ module cuda_functions
end subroutine cublas_ztrmm_c
end interface
interface
subroutine cublas_ctrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) bind(C,name='cublasCtrmm')
use iso_c_binding
implicit none
character(1,C_CHAR),value :: side, uplo, trans, diag
integer(kind=C_INT),value :: m,n
integer(kind=C_INT), intent(in), value :: lda,ldb
complex(kind=C_FLOAT), value :: alpha
integer(kind=C_intptr_T), value :: a, b
end subroutine cublas_ctrmm_c
end interface
contains
! functions to set and query the CUDA devices
......@@ -448,6 +516,20 @@ module cuda_functions
#endif
end subroutine cublas_dgemm
subroutine cublas_sgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
use iso_c_binding
implicit none
character(1,C_CHAR) :: cta, ctb
integer(kind=C_INT) :: m,n,k
integer(kind=C_INT), intent(in) :: lda,ldb,ldc
real(kind=C_FLOAT) :: alpha,beta
integer(kind=C_intptr_T) :: a, b, c
#ifdef WITH_GPU_VERSION
call cublas_sgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
#endif
end subroutine cublas_sgemm
subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
use iso_c_binding
......@@ -463,6 +545,21 @@ module cuda_functions
#endif
end subroutine cublas_dtrmm
subroutine cublas_strmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
use iso_c_binding
implicit none
character(1,C_CHAR) :: side, uplo, trans, diag
integer(kind=C_INT) :: m,n
integer(kind=C_INT), intent(in) :: lda,ldb
real(kind=C_FLOAT) :: alpha
integer(kind=C_intptr_T) :: a, b
#ifdef WITH_GPU_VERSION
call cublas_strmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
#endif
end subroutine cublas_strmm
subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
use iso_c_binding
......@@ -478,6 +575,21 @@ module cuda_functions
#endif
end subroutine cublas_zgemm
subroutine cublas_cgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
use iso_c_binding
implicit none
character(1,C_CHAR) :: cta, ctb
integer(kind=C_INT) :: m,n,k
integer(kind=C_INT), intent(in) :: lda,ldb,ldc
complex(kind=C_FLOAT) :: alpha,beta
integer(kind=C_intptr_T) :: a, b, c
#ifdef WITH_GPU_VERSION
call cublas_cgemm_c(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
#endif
end subroutine cublas_cgemm
subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
use iso_c_binding
......@@ -493,6 +605,20 @@ module cuda_functions
#endif
end subroutine cublas_ztrmm
subroutine cublas_ctrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
use iso_c_binding
implicit none
character(1,C_CHAR) :: side, uplo, trans, diag
integer(kind=C_INT) :: m,n
integer(kind=C_INT), intent(in) :: lda,ldb
complex(kind=C_FLOAT) :: alpha
integer(kind=C_intptr_T) :: a, b
#ifdef WITH_GPU_VERSION
call cublas_ctrmm_c(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
#endif
end subroutine cublas_ctrmm
end module cuda_functions
......@@ -73,11 +73,15 @@ int main(int argc, char** argv) {
int na_rows, na_cols;
double startVal;
#ifdef DOUBLE_PRECISION_COMPLEX
complex double *a, *z, *as, *tmp1, *tmp2;
double *ev, *xr;
#else
complex *a, *z, *as, *tmp1, *tmp2;
float *ev, *xr;
#endif
int *iseed;
int success;
......@@ -105,6 +109,11 @@ int main(int argc, char** argv) {
printf("\n");
#ifdef DOUBLE_PRECISION_COMPLEX
printf(" Double precision version of ELPA2 is used. \n");
#else
printf(" Single precision version of ELPA2 is used. \n");
#endif
}
status = 0;
......@@ -162,7 +171,7 @@ int main(int argc, char** argv) {
printf("Allocating matrices with na_rows=%d and na_cols=%d\n",na_rows, na_cols);
printf("\n");
}
#ifdef DOUBLE_PRECISION_COMPLEX
a = malloc(na_rows*na_cols*sizeof(complex double));
z = malloc(na_rows*na_cols*sizeof(complex double));
as = malloc(na_rows*na_cols*sizeof(complex double));
......@@ -174,10 +183,25 @@ int main(int argc, char** argv) {
tmp1 = malloc(na_rows*na_cols*sizeof(complex double));
tmp2 = malloc(na_rows*na_cols*sizeof(complex double));
#else
a = malloc(na_rows*na_cols*sizeof(complex));
z = malloc(na_rows*na_cols*sizeof(complex));
as = malloc(na_rows*na_cols*sizeof(complex));
xr = malloc(na_rows*na_cols*sizeof(float));
iseed = malloc(4096*sizeof(int));
prepare_matrix_complex_from_fortran(na, myid, na_rows, na_cols, sc_desc, iseed, xr, a, z, as);
ev = malloc(na*sizeof(float));
tmp1 = malloc(na_rows*na_cols*sizeof(complex));
tmp2 = malloc(na_rows*na_cols*sizeof(complex));
#endif
iseed = malloc(4096*sizeof(int));
#ifdef DOUBLE_PRECISION_COMPLEX
prepare_matrix_complex_from_fortran_double_precision(na, myid, na_rows, na_cols, sc_desc, iseed, xr, a, z, as);
#else
prepare_matrix_complex_from_fortran_single_precision(na, myid, na_rows, na_cols, sc_desc, iseed, xr, a, z, as);
#endif
free(xr);
......@@ -189,7 +213,11 @@ int main(int argc, char** argv) {
mpierr = MPI_Barrier(MPI_COMM_WORLD);
THIS_COMPLEX_ELPA_KERNEL_API = ELPA2_COMPLEX_KERNEL_GENERIC;
success = elpa_solve_evp_complex_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_COMPLEX_ELPA_KERNEL_API);
#ifdef DOUBLE_PRECISION_COMPLEX
success = elpa_solve_evp_complex_2stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_COMPLEX_ELPA_KERNEL_API);
#else
success = elpa_solve_evp_complex_2stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_COMPLEX_ELPA_KERNEL_API);
#endif
if (success != 1) {
printf("error in ELPA solve \n");
......@@ -204,8 +232,11 @@ int main(int argc, char** argv) {
}
/* check the results */
status = check_correctness_complex_from_fortran(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#ifdef DOUBLE_PRECISION_COMPLEX
status = check_correctness_complex_from_fortran_double_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#else
status = check_correctness_complex_from_fortran_single_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#endif
if (status !=0){
printf("The computed EVs are not correct !\n");
}
......
......@@ -72,9 +72,11 @@ int main(int argc, char** argv) {
int na_rows, na_cols;
double startVal;
#ifdef DOUBLE_PRECISION_REAL
double *a, *z, *as, *ev, *tmp1, *tmp2;
#else
float *a, *z, *as, *ev, *tmp1, *tmp2;
#endif
int *iseed;
int success;
......@@ -100,7 +102,11 @@ int main(int argc, char** argv) {
printf("as it's Fortran counterpart. It's only purpose is to show how \n");
printf("to evoke ELPA1 from a c programm\n");
printf("\n");
#ifdef DOUBLE_PRECISION_REAL
printf(" Double precision version of ELPA2 is used. \n");
#else
printf(" Single precision version of ELPA2 is used. \n");
#endif
}
status = 0;
......@@ -158,7 +164,7 @@ int main(int argc, char** argv) {
printf("Allocating matrices with na_rows=%d and na_cols=%d\n",na_rows, na_cols);
printf("\n");
}
#ifdef DOUBLE_PRECISION_REAL
a = malloc(na_rows*na_cols*sizeof(double));
z = malloc(na_rows*na_cols*sizeof(double));
as = malloc(na_rows*na_cols*sizeof(double));
......@@ -168,11 +174,23 @@ int main(int argc, char** argv) {
tmp1 = malloc(na_rows*na_cols*sizeof(double));
tmp2 = malloc(na_rows*na_cols*sizeof(double));
#else
a = malloc(na_rows*na_cols*sizeof(float));
z = malloc(na_rows*na_cols*sizeof(float));
as = malloc(na_rows*na_cols*sizeof(float));
iseed = malloc(4096*sizeof(int));
prepare_matrix_real_from_fortran(na, myid, na_rows, na_cols, sc_desc, iseed, a, z, as);
ev = malloc(na*sizeof(float));
tmp1 = malloc(na_rows*na_cols*sizeof(float));
tmp2 = malloc(na_rows*na_cols*sizeof(float));
#endif
iseed = malloc(4096*sizeof(int));
#ifdef DOUBLE_PRECISION_REAL
prepare_matrix_real_from_fortran_double_precision(na, myid, na_rows, na_cols, sc_desc, iseed, a, z, as);
#else
prepare_matrix_real_from_fortran_single_precision(na, myid, na_rows, na_cols, sc_desc, iseed, a, z, as);
#endif
if (myid == 0) {
printf("\n");
printf("Entering ELPA 2stage real solver\n");
......@@ -182,9 +200,11 @@ int main(int argc, char** argv) {
mpierr = MPI_Barrier(MPI_COMM_WORLD);
useQr = 0;
THIS_REAL_ELPA_KERNEL_API = ELPA2_REAL_KERNEL_GENERIC;
success = elpa_solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_REAL_ELPA_KERNEL_API, useQr);
#ifdef DOUBLE_PRECISION_REAL
success = elpa_solve_evp_real_2stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_REAL_ELPA_KERNEL_API, useQr);
#else
success = elpa_solve_evp_real_2stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_REAL_ELPA_KERNEL_API, useQr);
#endif
if (success != 1) {
printf("error in ELPA solve \n");
mpierr = MPI_Abort(MPI_COMM_WORLD, 99);
......@@ -198,7 +218,11 @@ int main(int argc, char** argv) {
}
/* check the results */
status = check_correctness_real_from_fortran(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#ifdef DOUBLE_PRECISION_REAL
status = check_correctness_real_from_fortran_double_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#else
status = check_correctness_real_from_fortran_single_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#endif
if (status !=0){
printf("The computed EVs are not correct !\n");
......
......@@ -157,6 +157,21 @@ program test_complex2
print *," "
endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
if (myid .eq. 0) then
print *," "
print *,"Double precision version of ELPA2 is used"
print *," "
endif
#else
if (myid .eq. 0) then
print *," "
print *,"Single precision version of ELPA2 is used"
print *," "
endif
#endif
#ifdef HAVE_REDIRECT
if (check_redirect_environment_variable()) then
if (myid .eq. 0) then
......
......@@ -160,6 +160,20 @@ program test_complex2
endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
if (myid .eq. 0) then
print *," "
print *,"Double precision version of ELPA2 is used"
print *," "
endif
#else
if (myid .eq. 0) then
print *," "
print *,"Single precision version of ELPA2 is used"
print *," "
endif
#endif
#ifdef HAVE_REDIRECT
if (check_redirect_environment_variable()) then
if (myid .eq. 0) then
......
......@@ -156,6 +156,21 @@ program test_complex2
print *," "
endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
if (myid .eq. 0) then
print *," "
print *,"Double precision version of ELPA2 is used"
print *," "
endif
#else
if (myid .eq. 0) then
print *," "
print *,"Single precision version of ELPA2 is used"
print *," "
endif
#endif
#ifdef HAVE_REDIRECT
if (check_redirect_environment_variable()) then
if (myid .eq. 0) then
......
......@@ -152,6 +152,20 @@ program test_real2
endif
#endif
#ifdef DOUBLE_PRECISION_REAL
if (myid .eq. 0) then
print *," "
print *,"Double precision version of ELPA2 is used"
print *," "
endif
#else
if (myid .eq. 0) then
print *," "
print *,"Single precision version of ELPA2 is used"
print *," "
endif
#endif
#ifdef HAVE_REDIRECT
if (check_redirect_environment_variable()) then
if (myid .eq. 0) then
......
......@@ -156,6 +156,21 @@ program test_real2
print *," "
endif
#endif
#ifdef DOUBLE_PRECISION_REAL
if (myid .eq. 0) then
print *," "
print *,"Double precision version of ELPA2 is used"
print *," "
endif
#else
if (myid .eq. 0) then
print *," "
print *,"Single precision version of ELPA2 is used"
print *," "
endif