Commit bb63bd9e authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master' into ELPA_GPU

parents 9a1c491b 0eacaed7
......@@ -177,6 +177,8 @@ module ELPA1_compute
#ifdef WITH_OPENMP
real(kind=rk), allocatable :: ur_p(:,:), uc_p(:,:)
#endif
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("tridiag_real")
......@@ -203,17 +205,51 @@ module ELPA1_compute
max_local_rows = max_blocks_row*nblk
max_local_cols = max_blocks_col*nblk
allocate(tmp(MAX(max_local_rows,max_local_cols)))
allocate(vr(max_local_rows+1))
allocate(ur(max_local_rows))
allocate(vc(max_local_cols))
allocate(uc(max_local_cols))
allocate(tmp(MAX(max_local_rows,max_local_cols)), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating tmp "//errorMessage
stop
endif
allocate(vr(max_local_rows+1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating vr "//errorMessage
stop
endif
allocate(ur(max_local_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating ur "//errorMessage
stop
endif
allocate(vc(max_local_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating vc "//errorMessage
stop
endif
allocate(uc(max_local_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating uc "//errorMessage
stop
endif
#ifdef WITH_OPENMP
max_threads = omp_get_max_threads()
allocate(ur_p(max_local_rows,0:max_threads-1))
allocate(uc_p(max_local_cols,0:max_threads-1))
allocate(ur_p(max_local_rows,0:max_threads-1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating ur_p "//errorMessage
stop
endif
allocate(uc_p(max_local_cols,0:max_threads-1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating uc_p "//errorMessage
stop
endif
#endif
tmp = 0
......@@ -222,8 +258,17 @@ module ELPA1_compute
vc = 0
uc = 0
allocate(vur(max_local_rows,2*max_stored_rows))
allocate(uvc(max_local_cols,2*max_stored_rows))
allocate(vur(max_local_rows,2*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating vur "//errorMessage
stop
endif
allocate(uvc(max_local_cols,2*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating uvc "//errorMessage
stop
endif
d(:) = 0
e(:) = 0
......@@ -487,11 +532,20 @@ module ELPA1_compute
if (my_prow==prow(1, nblk, np_rows) .and. my_pcol==pcol(2, nblk, np_cols)) e(1) = a(1,l_cols) ! use last l_cols value of loop above
if (my_prow==prow(1, nblk, np_rows) .and. my_pcol==pcol(1, nblk, np_cols)) d(1) = a(1,1)
deallocate(tmp, vr, ur, vc, uc, vur, uvc)
deallocate(tmp, vr, ur, vc, uc, vur, uvc, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when deallocating uvc "//errorMessage
stop
endif
! distribute the arrays d and e to all processors
allocate(tmp(na))
allocate(tmp(na), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when allocating tmp "//errorMessage
stop
endif
#ifdef DOUBLE_PRECISION_REAL
tmp = d
call mpi_allreduce(tmp, d, na, MPI_REAL8, MPI_SUM, mpi_comm_rows, mpierr)
......@@ -511,7 +565,12 @@ module ELPA1_compute
tmp = e
call mpi_allreduce(tmp, e, na, MPI_REAL4, MPI_SUM, mpi_comm_cols, mpierr)
#endif
deallocate(tmp)
deallocate(tmp, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_real: error when deallocating tmp "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("tridiag_real")
#endif
......@@ -575,7 +634,8 @@ module ELPA1_compute
real(kind=rk), allocatable :: tmp1(:), tmp2(:), hvb(:), hvm(:,:)
real(kind=rk), allocatable :: tmat(:,:), h1(:), h2(:)
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("trans_ev_real")
#endif
......@@ -595,13 +655,47 @@ module ELPA1_compute
max_stored_rows = (63/nblk+1)*nblk
allocate(tmat(max_stored_rows,max_stored_rows))
allocate(h1(max_stored_rows*max_stored_rows))
allocate(h2(max_stored_rows*max_stored_rows))
allocate(tmp1(max_local_cols*max_stored_rows))
allocate(tmp2(max_local_cols*max_stored_rows))
allocate(hvb(max_local_rows*nblk))
allocate(hvm(max_local_rows,max_stored_rows))
allocate(tmat(max_stored_rows,max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when allocating tmat "//errorMessage
stop
endif
allocate(h1(max_stored_rows*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when allocating h1 "//errorMessage
stop
endif
allocate(h2(max_stored_rows*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when allocating h2 "//errorMessage
stop
endif
allocate(tmp1(max_local_cols*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when allocating tmp1 "//errorMessage
stop
endif
allocate(tmp2(max_local_cols*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when allocating tmp2 "//errorMessage
stop
endif
allocate(hvb(max_local_rows*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when allocating hvn "//errorMessage
stop
endif
allocate(hvm(max_local_rows,max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when allocating hvm "//errorMessage
stop
endif
hvm = 0 ! Must be set to 0 !!!
hvb = 0 ! Safety only
......@@ -724,7 +818,11 @@ module ELPA1_compute
enddo
deallocate(tmat, h1, h2, tmp1, tmp2, hvb, hvm)
deallocate(tmat, h1, h2, tmp1, tmp2, hvb, hvm, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_real: error when deallocating hvm "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("trans_ev_real")
......@@ -806,7 +904,8 @@ module ELPA1_compute
logical :: a_lower, a_upper, c_lower, c_upper
real(kind=rk), allocatable :: aux_mat(:,:), aux_bc(:), tmp1(:,:), tmp2(:,:)
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mult_at_b_real")
#endif
......@@ -850,10 +949,29 @@ module ELPA1_compute
nblk_mult = (63/nblk+1)*nblk
endif
allocate(aux_mat(l_rows,nblk_mult))
allocate(aux_bc(l_rows*nblk))
allocate(lrs_save(nblk))
allocate(lre_save(nblk))
allocate(aux_mat(l_rows,nblk_mult), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_at_b_real: error when allocating aux_mat "//errorMessage
stop
endif
allocate(aux_bc(l_rows*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_at_b_real: error when allocating aux_bc "//errorMessage
stop
endif
allocate(lrs_save(nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_at_b_real: error when allocating lrs_save "//errorMessage
stop
endif
allocate(lre_save(nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_at_b_real: error when allocating lre_save "//errorMessage
stop
endif
a_lower = .false.
a_upper = .false.
......@@ -951,7 +1069,12 @@ module ELPA1_compute
if (c_lower) lce = MIN(local_index(gcol, my_pcol, np_cols, nblk, -1),l_cols)
if (lcs<=lce) then
allocate(tmp1(nstor,lcs:lce),tmp2(nstor,lcs:lce))
allocate(tmp1(nstor,lcs:lce),tmp2(nstor,lcs:lce), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_at_b_real: error when allocating tmp1 "//errorMessage
stop
endif
if (lrs<=lre) then
#ifdef DOUBLE_PRECISION_REAL
call dgemm('T', 'N', nstor, lce-lcs+1, lre-lrs+1, 1.0_rk, aux_mat(lrs,1), ubound(aux_mat,dim=1), &
......@@ -974,7 +1097,12 @@ module ELPA1_compute
! Put the result into C
if (my_prow==np) c(nr_done+1:nr_done+nstor,lcs:lce) = tmp2(1:nstor,lcs:lce)
deallocate(tmp1,tmp2)
deallocate(tmp1,tmp2, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_at_b_real: error when deallocating tmp1 "//errorMessage
stop
endif
endif
nr_done = nr_done+nstor
......@@ -984,7 +1112,12 @@ module ELPA1_compute
enddo
enddo
deallocate(aux_mat, aux_bc, lrs_save, lre_save)
deallocate(aux_mat, aux_bc, lrs_save, lre_save, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_at_b_real: error when deallocating aux_mat "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mult_at_b_real")
#endif
......@@ -1083,6 +1216,8 @@ module ELPA1_compute
complex(kind=ck), allocatable :: ur_p(:,:), uc_p(:,:)
#endif
real(kind=rk), allocatable :: tmpr(:)
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("tridiag_complex")
......@@ -1109,17 +1244,50 @@ module ELPA1_compute
max_local_rows = max_blocks_row*nblk
max_local_cols = max_blocks_col*nblk
allocate(tmp(MAX(max_local_rows,max_local_cols)))
allocate(vr(max_local_rows+1))
allocate(ur(max_local_rows))
allocate(vc(max_local_cols))
allocate(uc(max_local_cols))
allocate(tmp(MAX(max_local_rows,max_local_cols)), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating tmp "//errorMessage
stop
endif
allocate(vr(max_local_rows+1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating vr "//errorMessage
stop
endif
allocate(ur(max_local_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating ur "//errorMessage
stop
endif
allocate(vc(max_local_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating vc "//errorMessage
stop
endif
allocate(uc(max_local_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating uc "//errorMessage
stop
endif
#ifdef WITH_OPENMP
max_threads = omp_get_max_threads()
allocate(ur_p(max_local_rows,0:max_threads-1))
allocate(uc_p(max_local_cols,0:max_threads-1))
allocate(ur_p(max_local_rows,0:max_threads-1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating ur_p "//errorMessage
stop
endif
allocate(uc_p(max_local_cols,0:max_threads-1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating uc_p "//errorMessage
stop
endif
#endif
tmp = 0
......@@ -1128,8 +1296,17 @@ module ELPA1_compute
vc = 0
uc = 0
allocate(vur(max_local_rows,2*max_stored_rows))
allocate(uvc(max_local_cols,2*max_stored_rows))
allocate(vur(max_local_rows,2*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating vur "//errorMessage
stop
endif
allocate(uvc(max_local_cols,2*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating uvc "//errorMessage
stop
endif
d(:) = 0
e(:) = 0
......@@ -1423,11 +1600,19 @@ module ELPA1_compute
if (my_prow==prow(1, nblk, np_rows) .and. my_pcol==pcol(1, nblk, np_cols)) d(1) = a(1,1)
deallocate(tmp, vr, ur, vc, uc, vur, uvc)
deallocate(tmp, vr, ur, vc, uc, vur, uvc, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when deallocating tmp "//errorMessage
stop
endif
! distribute the arrays d and e to all processors
allocate(tmpr(na))
allocate(tmpr(na), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating tmpr "//errorMessage
stop
endif
#ifdef DOUBLE_PRECISION_COMPLEX
tmpr = d
call mpi_allreduce(tmpr, d, na, MPI_REAL8, MPI_SUM, mpi_comm_rows, mpierr)
......@@ -1448,7 +1633,12 @@ module ELPA1_compute
call mpi_allreduce(tmpr, e, na, MPI_REAL4, MPI_SUM, mpi_comm_cols, mpierr)
#endif
deallocate(tmpr)
deallocate(tmpr, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when deallocating tmpr "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("tridiag_complex")
#endif
......@@ -1512,7 +1702,8 @@ module ELPA1_compute
complex(kind=ck), allocatable :: tmp1(:), tmp2(:), hvb(:), hvm(:,:)
complex(kind=ck), allocatable :: tmat(:,:), h1(:), h2(:)
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("trans_ev_complex")
#endif
......@@ -1531,13 +1722,47 @@ module ELPA1_compute
max_stored_rows = (63/nblk+1)*nblk
allocate(tmat(max_stored_rows,max_stored_rows))
allocate(h1(max_stored_rows*max_stored_rows))
allocate(h2(max_stored_rows*max_stored_rows))
allocate(tmp1(max_local_cols*max_stored_rows))
allocate(tmp2(max_local_cols*max_stored_rows))
allocate(hvb(max_local_rows*nblk))
allocate(hvm(max_local_rows,max_stored_rows))
allocate(tmat(max_stored_rows,max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating tmat "//errorMessage
stop
endif
allocate(h1(max_stored_rows*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating h1 "//errorMessage
stop
endif
allocate(h2(max_stored_rows*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating h2 "//errorMessage
stop
endif
allocate(tmp1(max_local_cols*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating tmp1 "//errorMessage
stop
endif
allocate(tmp2(max_local_cols*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating tmp2 "//errorMessage
stop
endif
allocate(hvb(max_local_rows*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating hvb "//errorMessage
stop
endif
allocate(hvm(max_local_rows,max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating hvm "//errorMessage
stop
endif
hvm = 0 ! Must be set to 0 !!!
hvb = 0 ! Safety only
......@@ -1662,7 +1887,11 @@ module ELPA1_compute
enddo
deallocate(tmat, h1, h2, tmp1, tmp2, hvb, hvm)
deallocate(tmat, h1, h2, tmp1, tmp2, hvb, hvm, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when deallocating hvb "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("trans_ev_complex")
......@@ -1744,6 +1973,8 @@ module ELPA1_compute
logical :: a_lower, a_upper, c_lower, c_upper
complex(kind=ck), allocatable :: aux_mat(:,:), aux_bc(:), tmp1(:,:), tmp2(:,:)
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mult_ah_b_complex")
......@@ -1789,10 +2020,29 @@ module ELPA1_compute
nblk_mult = (63/nblk+1)*nblk
endif
allocate(aux_mat(l_rows,nblk_mult))
allocate(aux_bc(l_rows*nblk))
allocate(lrs_save(nblk))
allocate(lre_save(nblk))
allocate(aux_mat(l_rows,nblk_mult), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_ah_b_complex: error when allocating aux_mat "//errorMessage
stop
endif
allocate(aux_bc(l_rows*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_ah_b_complex: error when allocating aux_bc "//errorMessage
stop
endif
allocate(lrs_save(nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_ah_b_complex: error when allocating lrs_save "//errorMessage
stop
endif
allocate(lre_save(nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_ah_b_complex: error when allocating lre_save "//errorMessage
stop
endif
a_lower = .false.
a_upper = .false.
......@@ -1890,7 +2140,12 @@ module ELPA1_compute
if (c_lower) lce = MIN(local_index(gcol, my_pcol, np_cols, nblk, -1),l_cols)
if (lcs<=lce) then
allocate(tmp1(nstor,lcs:lce),tmp2(nstor,lcs:lce))
allocate(tmp1(nstor,lcs:lce),tmp2(nstor,lcs:lce), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_ah_b_complex: error when allocating tmp1 "//errorMessage
stop
endif
if (lrs<=lre) then
#ifdef DOUBLE_PRECISION_COMPLEX
call zgemm('C', 'N', nstor, lce-lcs+1, lre-lrs+1, (1.0_rk,0.0_rk), aux_mat(lrs,1), ubound(aux_mat,dim=1), &
......@@ -1912,7 +2167,12 @@ module ELPA1_compute
! Put the result into C
if (my_prow==np) c(nr_done+1:nr_done+nstor,lcs:lce) = tmp2(1:nstor,lcs:lce)
deallocate(tmp1,tmp2)
deallocate(tmp1,tmp2, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_ah_b_complex: error when deallocating tmp1 "//errorMessage
stop
endif
endif
nr_done = nr_done+nstor
......@@ -1922,7 +2182,12 @@ module ELPA1_compute
enddo
enddo
deallocate(aux_mat, aux_bc, lrs_save, lre_save)
deallocate(aux_mat, aux_bc, lrs_save, lre_save, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"mult_ah_b_complex: error when deallocating aux_mat "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mult_ah_b_complex")
#endif
......@@ -1951,6 +2216,9 @@ module ELPA1_compute
logical, intent(in) :: wantDebug
logical, intent(out) :: success
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_tridi")
#endif
......@@ -1971,7 +2239,11 @@ module ELPA1_compute
! Get the limits of the subdivisons, each subdivison has as many cols
! as fit on the respective processor column
allocate(limits(0:np_cols))
allocate(limits(0:np_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_tridi: error when allocating limits "//errorMessage
stop
endif
limits(0) = 0
do np=0,np_cols-1
......@@ -2022,7 +2294,12 @@ module ELPA1_compute
! If there is only 1 processor column, we are done
if (np_cols==1) then
deallocate(limits)
deallocate(limits, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_tridi: error when deallocating limits "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi")
#endif
......@@ -2033,8 +2310,17 @@ module ELPA1_compute
! Dense distribution scheme: