Commit e1fe8e52 authored by Andreas Marek's avatar Andreas Marek
Browse files

Retab

parent 0bc4e434
......@@ -796,7 +796,7 @@
&PRECISION&
&(obj, d1, dbase, ddiff, z, ev_scale_value, na1,i)
use precision
use elpa_abstract_impl
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: na1, i
......@@ -825,7 +825,7 @@
&PRECISION&
&(obj, idx_ev, nLength)
use precision
use elpa_abstract_impl
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: nLength
......@@ -908,7 +908,7 @@
&PRECISION&
&(obj, col1, col2)
use precision
use elpa_abstract_impl
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -1099,7 +1099,7 @@
! This is a test routine for checking if the eigenvalues are monotonically increasing.
! It is for debug purposes only, an error should never be triggered!
use precision
use elpa_abstract_impl
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
......
......@@ -126,15 +126,15 @@
allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&: error when allocating tmp1 "//errorMessage
&MATH_DATATYPE&: error when allocating tmp1 "//errorMessage
stop 1
endif
allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&
&: error when allocating tmp2 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmp2 "//errorMessage
stop 1
endif
......@@ -144,16 +144,16 @@
allocate(tmatr(l_rows,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&
&: error when allocating tmatr "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmatr "//errorMessage
stop 1
endif
allocate(tmatc(l_cols,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&
&: error when allocating tmatc "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmatc "//errorMessage
stop 1
endif
......@@ -183,10 +183,10 @@
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_cholesky_&
&MATH_DATATYPE&
&MATH_DATATYPE&
#if REALCASE == 1
&: Error in dpotrf: ",info
&: Error in dpotrf: ",info
#endif
#if COMPLEXCASE == 1
&: Error in zpotrf: ",info
......@@ -214,13 +214,13 @@
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_cholesky_&
&MATH_DATATYPE&
&MATH_DATATYPE&
#if REALCASE == 1
&: Error in dpotrf 2: ",info
&: Error in dpotrf 2: ",info
#endif
#if COMPLEXCASE == 1
&: Error in zpotrf 2: ",info
&: Error in zpotrf 2: ",info
#endif
success = .false.
......@@ -238,12 +238,12 @@
call MPI_Bcast(tmp1, nblk*(nblk+1)/2, &
#if REALCASE == 1
MPI_REAL_PRECISION, &
MPI_REAL_PRECISION, &
#endif
#if COMPLEXCASE == 1
MPI_COMPLEX_PRECISION, &
#endif
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
call obj%timer%stop("mpi_communication")
......@@ -258,12 +258,12 @@
if (l_cols-l_colx+1>0) &
#if REALCASE == 1
call PRECISION_TRSM('L', 'U', 'T', 'N', nblk, l_cols-l_colx+1, CONST_1_0, tmp2, ubound(tmp2,dim=1), &
a(l_row1,l_colx), lda)
a(l_row1,l_colx), lda)
#endif
#if COMPLEXCASE == 1
call PRECISION_TRSM('L', 'U', 'C', 'N', nblk, l_cols-l_colx+1, CONST_COMPLEX_PAIR_1_0, &
tmp2, ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
tmp2, ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
#endif
call obj%timer%stop("blas")
......@@ -284,22 +284,22 @@
if (l_cols-l_colx+1>0) &
call MPI_Bcast(tmatc(l_colx,i), l_cols-l_colx+1, &
#if REALCASE == 1
MPI_REAL_PRECISION, &
MPI_REAL_PRECISION, &
#endif
#if COMPLEXCASE == 1
MPI_COMPLEX_PRECISION, &
#endif
prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
enddo
! this has to be checked since it was changed substantially when doing type safe
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
tmatr, ubound(tmatr,dim=1), mpi_comm_rows, &
n, na, nblk, nblk)
......@@ -314,12 +314,12 @@
#if REALCASE == 1
call PRECISION_GEMM('N', 'T', lre-lrs+1, lce-lcs+1, nblk, -CONST_1_0, &
tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
CONST_1_0, a(lrs,lcs), lda)
CONST_1_0, a(lrs,lcs), lda)
#endif
#if COMPLEXCASE == 1
call PRECISION_GEMM('N', 'C', lre-lrs+1, lce-lcs+1, nblk, -CONST_COMPLEX_PAIR_1_0, &
tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
CONST_COMPLEX_PAIR_1_0, a(lrs,lcs), lda)
#endif
call obj%timer%stop("blas")
......@@ -331,8 +331,8 @@
deallocate(tmp1, tmp2, tmatr, tmatc, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&
&: error when deallocating tmp1 "//errorMessage
&MATH_DATATYPE&
&: error when deallocating tmp1 "//errorMessage
stop 1
endif
......
......@@ -125,16 +125,16 @@
allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_invert_trm_&
&MATH_DATATYPE&
&: error when allocating tmp1 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmp1 "//errorMessage
stop 1
endif
allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_invert_trm_&
&MATH_DATATYPE&
&: error when allocating tmp2 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmp2 "//errorMessage
stop 1
endif
......@@ -144,16 +144,16 @@
allocate(tmat1(l_rows,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_invert_trm_&
&MATH_DATATYPE&
&: error when allocating tmat1 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmat1 "//errorMessage
stop 1
endif
allocate(tmat2(nblk,l_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_invert_trm_&
&MATH_DATATYPE&
&: error when allocating tmat2 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmat2 "//errorMessage
stop 1
endif
......@@ -185,13 +185,13 @@
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_invert_trm_&
&MATH_DATATYPE&
&MATH_DATATYPE&
#if REALCASE == 1
&: Error in DTRTRI"
&: Error in DTRTRI"
#endif
#if COMPLEXCASE == 1
&: Error in ZTRTRI"
&: Error in ZTRTRI"
#endif
success = .false.
......@@ -229,7 +229,7 @@
call obj%timer%start("blas")
if (l_cols-l_colx+1>0) &
call PRECISION_TRMM ('L', 'U', 'N', 'N', nb, l_cols-l_colx+1, &
call PRECISION_TRMM ('L', 'U', 'N', 'N', nb, l_cols-l_colx+1, &
#if REALCASE == 1
CONST_1_0, &
#endif
......@@ -268,7 +268,7 @@
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
if (l_cols-l_col1+1>0) &
call MPI_Bcast(tmat2(1,l_col1), (l_cols-l_col1+1)*nblk, &
call MPI_Bcast(tmat2(1,l_col1), (l_cols-l_col1+1)*nblk, &
#if REALCASE == 1
MPI_REAL_PRECISION, &
#endif
......@@ -305,8 +305,8 @@
deallocate(tmp1, tmp2, tmat1, tmat2, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_invert_trm_&
&MATH_DATATYPE&
&: error when deallocating tmp1 "//errorMessage
&MATH_DATATYPE&
&: error when deallocating tmp1 "//errorMessage
stop 1
endif
......
......@@ -136,16 +136,16 @@
allocate(aux_mat(l_rows,nblk_mult), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_mult_at_b_&
&MATH_DATATYPE&
&: error when allocating aux_mat "//errorMessage
&MATH_DATATYPE&
&: error when allocating aux_mat "//errorMessage
stop 1
endif
allocate(aux_bc(l_rows*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_mult_at_b_&
&MATH_DATATYPE&
&: error when allocating aux_bc "//errorMessage
&MATH_DATATYPE&
&: error when allocating aux_bc "//errorMessage
stop 1
endif
......@@ -280,22 +280,22 @@
#if COMPLEXCASE == 1
call PRECISION_GEMM('C', 'N', &
#endif
nstor, lce-lcs+1, lre-lrs+1, &
nstor, lce-lcs+1, lre-lrs+1, &
#if REALCASE == 1
CONST_1_0, &
CONST_1_0, &
#endif
#if COMPLEXCASE == 1
CONST_COMPLEX_PAIR_1_0, &
#endif
aux_mat(lrs,1), ubound(aux_mat,dim=1), &
aux_mat(lrs,1), ubound(aux_mat,dim=1), &
b(lrs,lcs), ldb, &
#if REALCASE == 1
CONST_0_0, &
CONST_0_0, &
#endif
#if COMPLEXCASE == 1
CONST_COMPLEX_PAIR_0_0, &
#endif
tmp1, nstor)
tmp1, nstor)
call obj%timer%stop("blas")
else
tmp1 = 0
......@@ -307,12 +307,12 @@
call mpi_reduce(tmp1, tmp2, nstor*(lce-lcs+1), &
#if REALCASE == 1
MPI_REAL_PRECISION, &
MPI_REAL_PRECISION, &
#endif
#if COMPLEXCASE == 1
MPI_COMPLEX_PRECISION, &
MPI_COMPLEX_PRECISION, &
#endif
MPI_SUM, np, mpi_comm_rows, mpierr)
MPI_SUM, np, mpi_comm_rows, mpierr)
call obj%timer%stop("mpi_communication")
! Put the result into C
......
......@@ -58,10 +58,10 @@ function elpa_invert_trm_&
&PRECISION&
& (na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) &
result(success) bind(C,name="elpa_invert_trm_&
&MATH_DATATYPE&
&_&
&PRECISION&
")
&MATH_DATATYPE&
&_&
&PRECISION&
&")
use, intrinsic :: iso_c_binding
use elpa1_auxiliary, only : elpa_invert_trm_&
&MATH_DATATYPE&
......
......@@ -62,7 +62,7 @@ function elpa_mult_at_b_&
&MATH_DATATYPE&
&_&
&PRECISION&
") result(success)
&") result(success)
use, intrinsic :: iso_c_binding
use elpa1_auxiliary, only : elpa_mult_at_b_&
&MATH_DATATYPE&
......
This diff is collapsed.
This diff is collapsed.
......@@ -231,9 +231,9 @@
if (istep < na/nb2) then
! Transform first block column of remaining matrix
call obj%timer%start("blas")
call obj%timer%start("blas")
call PRECISION_GEQRF(n, nb2, ab(1+nb2,na_s-n_off), 2*nb-1, tau, work, lwork, info)
call obj%timer%stop("blas")
call obj%timer%stop("blas")
do i=1,nb2
hv(i,i) = CONST_1_0
......@@ -307,8 +307,8 @@
nr = MIN(na-nb-ns-n_off+1,nb) ! rows in subdiagonal block (may be < 0!!!)
! Note that nr>=0 implies that diagonal block is full (nc==nb)!
call wy_gen_&
&PRECISION&
&(obj,nc,nb2,w,hv,tau,work,nb)
&PRECISION&
&(obj,nc,nb2,w,hv,tau,work,nb)
if (iblk==nblocks .and. nc==nb) then
!request last nb2 columns
......@@ -329,11 +329,11 @@
if (nr>0) then
call wy_right_&
&PRECISION&
&(obj,nr,nb,nb2,ab(nb+1,ns),2*nb-1,w,hv,work,nb)
call obj%timer%start("blas")
&PRECISION&
&(obj,nr,nb,nb2,ab(nb+1,ns),2*nb-1,w,hv,work,nb)
call obj%timer%start("blas")
call PRECISION_GEQRF(nr, nb2, ab(nb+1,ns), 2*nb-1, tau_new, work, lwork, info)
call obj%timer%stop("blas")
call obj%timer%stop("blas")
do i=1,nb2
hv_new(i,i) = CONST_1_0
hv_new(i+1:,i) = ab(nb+2:2*nb-i+1,ns+i-1)
......@@ -365,8 +365,8 @@
endif
call wy_symm_&
&PRECISION&
&(obj,nc,nb2,ab(1,ns),2*nb-1,w,hv,work,work2,nb)
&PRECISION&
&(obj,nc,nb2,ab(1,ns),2*nb-1,w,hv,work,work2,nb)
if (my_pe>0 .and. iblk==1) then
!send first nb2 columns to previous PE
......@@ -392,11 +392,11 @@
if (nr>0) then
call wy_gen_&
&PRECISION&
&(obj,nr,nb2,w_new,hv_new,tau_new,work,nb)
&PRECISION&
&(obj,nr,nb2,w_new,hv_new,tau_new,work,nb)
call wy_left_&
&PRECISION&
&(obj,nb-nb2,nr,nb2,ab(nb+1-nb2,ns+nb2),2*nb-1,w_new,hv_new,work,nb)
&PRECISION&
&(obj,nb-nb2,nr,nb2,ab(nb+1-nb2,ns+nb2),2*nb-1,w_new,hv_new,work,nb)
endif
! Use new HH Vector for the next block
......
......@@ -285,10 +285,10 @@
allocate(tmat(nbw,nbw,num_blocks), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&" // ": error when allocating tmat "//errorMessage
&MATH_DATATYPE&
&_2stage_&
&PRECISION&
&" // ": error when allocating tmat "//errorMessage
stop 1
endif
......@@ -436,9 +436,9 @@
deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
_2stage_&
&PRECISION " // ": error when deallocating tmat"//errorMessage
&MATH_DATATYPE&
&_2stage_&
&PRECISION " // ": error when deallocating tmat"//errorMessage
stop 1
endif
endif
......
......@@ -216,48 +216,48 @@
allocate(tmp1(max_local_cols*nbw), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error when allocating tmp1 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmp1 "//errorMessage
stop 1
endif
allocate(tmp2(max_local_cols*nbw), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error when allocating tmp2 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmp2 "//errorMessage
stop 1
endif
allocate(hvb(max_local_rows*nbw), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error when allocating hvb "//errorMessage
&MATH_DATATYPE&
&: error when allocating hvb "//errorMessage
stop 1
endif
allocate(hvm(max_local_rows,nbw), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error when allocating hvm "//errorMessage
&MATH_DATATYPE&
&: error when allocating hvm "//errorMessage
stop 1
endif
successCUDA = cuda_malloc(hvm_dev, (max_local_rows)*nbw* size_of_datatype)
if (.not.(successCUDA)) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error in cudaMalloc"
&MATH_DATATYPE&
&: error in cudaMalloc"
stop 1
endif
successCUDA = cuda_malloc(tmp_dev, (max_local_cols)*nbw* size_of_datatype)
if (.not.(successCUDA)) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error in cudaMalloc"
&MATH_DATATYPE&
&: error in cudaMalloc"
stop 1
endif
......@@ -266,7 +266,7 @@
!! already existent on GPU
! successCUDA = cuda_malloc(tmat_dev, nbw*nbw* &
!#if REALCASE == 1
! size_of_PRECISION_real)
! size_of_PRECISION_real)
!#endif
!#if COMPLEXCASE == 1
! size_of_PRECISION_complex)
......@@ -274,8 +274,8 @@
!
! if (.not.(successCUDA)) then
! print *,"trans_ev_band_to_full_&
! &MATH_DATATYPE&
! &: error in cudaMalloc"
! &MATH_DATATYPE&
! &: error in cudaMalloc"
! stop 1
! endif
!#endif
......@@ -315,8 +315,8 @@
successCUDA = cuda_memset(hvm_dev, 0, (max_local_rows)*(nbw)* size_of_datatype)
if (.not.(successCUDA)) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error in cudaMalloc"
&MATH_DATATYPE&
&: error in cudaMalloc"
stop 1
endif
......@@ -355,12 +355,12 @@
call obj%timer%start("mpi_communication")
call MPI_Bcast(hvb(ns+1), nb-ns, &
#if REALCASE == 1
MPI_REAL_PRECISION,&
MPI_REAL_PRECISION,&
#endif
#if COMPLEXCASE == 1
MPI_COMPLEX_PRECISION, &
#endif
pcol(ncol, nblk, np_cols), mpi_comm_cols, mpierr)
pcol(ncol, nblk, np_cols), mpi_comm_cols, mpierr)
call obj%timer%stop("mpi_communication")
......@@ -399,16 +399,16 @@
! Q = Q - V * T**T * V**T * Q
if (l_rows>0) then
call obj%timer%start("cublas")
call obj%timer%start("cublas")
#if REALCASE == 1
call cublas_PRECISION_GEMM('T', 'N', &
#endif
#if COMPLEXCASE == 1
call cublas_PRECISION_GEMM('C', 'N', &
#endif
n_cols, l_cols, l_rows, ONE, hvm_dev, max_local_rows, &
n_cols, l_cols, l_rows, ONE, hvm_dev, max_local_rows, &
q_dev, ldq , ZERO, tmp_dev, n_cols)
call obj%timer%stop("cublas")
call obj%timer%stop("cublas")
#if REALCASE == 1
#ifdef WITH_MPI
......@@ -485,11 +485,11 @@
! after the mpi_allreduce we have to copy back to the device
! copy back to device
successCUDA = cuda_memcpy(tmp_dev, loc(tmp2), n_cols*l_cols* size_of_datatype, &
cudaMemcpyHostToDevice)
cudaMemcpyHostToDevice)
if (.not.(successCUDA)) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error in cudaMemcpy"
&MATH_DATATYPE&
&: error in cudaMemcpy"
stop 1
endif
#else /* WITH_MPI */
......@@ -512,13 +512,13 @@
if (.not.(successCUDA)) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error in cudaMemcpy"
&MATH_DATATYPE&
&: error in cudaMemcpy"
stop 1
endif
!#endif /* WITH_MPI */
call obj%timer%start("cublas")
call obj%timer%start("cublas")
#if REALCASE == 1
call cublas_PRECISION_TRMM('L', 'U', 'T', 'N', &
#endif
......@@ -529,7 +529,7 @@
call cublas_PRECISION_GEMM('N', 'N', l_rows, l_cols, n_cols, -ONE, hvm_dev, max_local_rows, &
tmp_dev, n_cols, one, q_dev, ldq)
call obj%timer%stop("cublas")
call obj%timer%stop("cublas")
#if REALCASE == 1
! copy to host maybe this can be avoided
......@@ -567,32 +567,32 @@
allocate(tmp1(max_local_cols*cwy_blocking), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&
&: error when allocating tmp1 "//errorMessage
&MATH_DATATYPE&
&: error when allocating tmp1 "//errorMessage
stop 1
endif
allocate(tmp2(max_local_cols*cwy_blocking), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_band_to_full_&
&MATH_DATATYPE&a