Commit cdebde14 authored by Carolin Penke's avatar Carolin Penke
Browse files

skewsymmetric ELPA1

parent 7b58570b
......@@ -79,7 +79,11 @@ function elpa_solve_evp_&
MATH_DATATYPE(kind=rck), optional,target,intent(out) :: q(obj%local_nrows,*)
#else
MATH_DATATYPE(kind=rck), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
MATH_DATATYPE(kind=rck), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#if SKEWSYMMETRIC == 1
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols)
#else
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
#endif
#if REALCASE == 1
......@@ -93,17 +97,19 @@ function elpa_solve_evp_&
complex(kind=C_DATATYPE_KIND), allocatable :: tau(:)
complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
complex(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:)
integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
integer(kind=c_int) :: l_cols, l_rows, l_cols_nev
#endif /* COMPLEXCASE */
logical :: useGPU
integer(kind=c_int) :: skewsymmetric
logical :: isSkewsymmetric
logical :: success
logical :: do_useGPU, do_useGPU_tridiag, &
do_useGPU_solve_tridi, do_useGPU_trans_ev
integer(kind=ik) :: numberOfGPUDevices
integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, mpierr
integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
real(kind=C_DATATYPE_KIND), allocatable :: e(:)
logical :: wantDebug
integer(kind=c_int) :: istat, debug, gpu
......@@ -114,7 +120,8 @@ function elpa_solve_evp_&
logical :: do_tridiag, do_solve, do_trans_ev
integer(kind=ik) :: nrThreads
integer(kind=ik) :: global_index
call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......@@ -196,7 +203,15 @@ function elpa_solve_evp_&
else
useGPU = .false.
endif
call obj%get("is_skewsymmetric",skewsymmetric,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
......@@ -205,10 +220,10 @@ function elpa_solve_evp_&
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
#if COMPLEXCASE == 1
! #if COMPLEXCASE == 1
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif
! #endif
call obj%timer%stop("mpi_communication")
......@@ -369,13 +384,47 @@ function elpa_solve_evp_&
#if COMPLEXCASE == 1
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
#endif
if (isSkewsymmetric) then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols) = 0.0
do i = 1, obj%local_nrows
! global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
global_index = np_rows*nblk*((i-1)/nblk) + MOD(i-1,nblk) + MOD(np_rows+my_prow-0, np_rows)*nblk + 1
if (mod(global_index-1,4) .eq. 0) then
! do nothing
end if
if (mod(global_index-1,4) .eq. 1) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
if (mod(global_index-1,4) .eq. 2) then
q(i,1:obj%local_ncols) = -q(i,1:obj%local_ncols)
end if
if (mod(global_index-1,4) .eq. 3) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = -q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
end do
endif
call obj%timer%start("back")
! In the skew-symmetric case this transforms the real part
call trans_ev_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
if (isSkewsymmetric) then
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
call trans_ev_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, nev, a, lda, tau, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
endif
call obj%timer%stop("back")
endif ! do_trans_ev
......
......@@ -110,6 +110,8 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
logical, intent(in) :: useGPU, wantDebug
integer(kind=c_int) :: skewsymmetric
logical :: isSkewsymmetric
MATH_DATATYPE(kind=rck), intent(out) :: tau(na)
#ifdef USE_ASSUMED_SIZE
......@@ -179,6 +181,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
&PRECISION&
&_&
&MATH_DATATYPE
call obj%get("is_skewsymmetric",skewsymmetric,istat)
if (istat .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
if(useGPU) then
gpuString = "_gpu"
else
......@@ -526,10 +535,17 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
v_row(l_row_beg), 1, ONE, uc_p(l_col_beg,my_thread), 1)
if (i/=j) then
call PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
ONE, a_mat(l_row_beg,l_col_beg), lda, v_col(l_col_beg), 1, &
ONE, ur_p(l_row_beg,my_thread), 1)
if (isSkewsymmetric) then
call PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
-ONE, a_mat(l_row_beg,l_col_beg), lda, v_col(l_col_beg), 1, &
ONE, ur_p(l_row_beg,my_thread), 1)
else
call PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
ONE, a_mat(l_row_beg,l_col_beg), lda, v_col(l_col_beg), 1, &
ONE, ur_p(l_row_beg,my_thread), 1)
endif
endif
if (wantDebug) call obj%timer%stop("blas")
endif
......@@ -548,11 +564,17 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
ONE, u_col(l_col_beg), 1)
if (i/=j) then
call PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
ONE, a_mat(l_row_beg,l_col_beg), lda, &
v_col(l_col_beg), 1, &
ONE, u_row(l_row_beg), 1)
if (isSkewsymmetric) then
call PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
-ONE, a_mat(l_row_beg,l_col_beg), lda, &
v_col(l_col_beg), 1, &
ONE, u_row(l_row_beg), 1)
else
call PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
ONE, a_mat(l_row_beg,l_col_beg), lda, &
v_col(l_col_beg), 1, &
ONE, u_row(l_row_beg), 1)
endif
endif
if (wantDebug) call obj%timer%stop("blas")
endif ! not useGPU
......@@ -614,11 +636,17 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
a_offset = ((l_row_beg-1) + (l_col_beg - 1) * lda) * &
size_of_datatype
call cublas_PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
ONE, a_dev + a_offset, lda, &
v_col_dev + (l_col_beg - 1) * size_of_datatype,1, &
ONE, u_row_dev + (l_row_beg - 1) * size_of_datatype, 1)
if (isSkewsymmetric) then
call cublas_PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
-ONE, a_dev + a_offset, lda, &
v_col_dev + (l_col_beg - 1) * size_of_datatype,1, &
ONE, u_row_dev + (l_row_beg - 1) * size_of_datatype, 1)
else
call cublas_PRECISION_GEMV('N', l_row_end-l_row_beg+1, l_col_end-l_col_beg+1, &
ONE, a_dev + a_offset, lda, &
v_col_dev + (l_col_beg - 1) * size_of_datatype,1, &
ONE, u_row_dev + (l_row_beg - 1) * size_of_datatype, 1)
endif
enddo
end if !multiplication as one block / per stripes
......@@ -696,13 +724,21 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
u_col = tmp
#endif /* WITH_MPI */
endif
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, u_col, ubound(u_col,dim=1), mpi_comm_cols, u_row, ubound(u_row,dim=1), &
mpi_comm_rows, 1, istep-1, 1, nblk, max_threads)
if (isSkewsymmetric) then
call elpa_transpose_vectors_ss_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, u_col, ubound(u_col,dim=1), mpi_comm_cols, u_row, ubound(u_row,dim=1), &
mpi_comm_rows, 1, istep-1, 1, nblk, max_threads)
else
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, u_col, ubound(u_col,dim=1), mpi_comm_cols, u_row, ubound(u_row,dim=1), &
mpi_comm_rows, 1, istep-1, 1, nblk, max_threads)
endif
! calculate u**T * v (same as v**T * (A + VU**T + UV**T) * v )
x = 0
......@@ -828,7 +864,11 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
+ dot_product(vu_stored_rows(l_rows,1:2*n_stored_vecs),uv_stored_cols(l_cols,1:2*n_stored_vecs))
end if
#if REALCASE == 1
d_vec(istep-1) = a_mat(l_rows,l_cols)
if (isSkewsymmetric) then
d_vec(istep-1) = 0.0_rk
else
d_vec(istep-1) = a_mat(l_rows,l_cols)
endif
#endif
#if COMPLEXCASE == 1
d_vec(istep-1) = real(a_mat(l_rows,l_cols),kind=rk)
......@@ -920,7 +960,11 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
successCUDA = cuda_memcpy(loc(d_vec(1)), a_dev, 1 * size_of_datatype, cudaMemcpyDeviceToHost)
check_memcpy_cuda("tridiag: a_dev 8", successCUDA)
else !useGPU
d_vec(1) = a_mat(1,1)
if (isSkewsymmetric) then
d_vec(1) = 0.0_rk
else
d_vec(1) = a_mat(1,1)
endif
endif !useGPU
endif
#endif
......
......@@ -496,7 +496,7 @@
#if REALCASE == 1
if (isSkewsymmetric) then
d(istep) = 0
d(istep) = 0.0_rk
else
d(istep) = ab(1,na_s-n_off)
endif
......
......@@ -217,7 +217,7 @@ program test
call e_complex%set("gpu", 0)
assert_elpa_ok(e_complex%setup())
call e_complex%set("solver", elpa_solver_2stage, error)
call e_complex%set("solver", elpa_solver_1stage, error)
call e_complex%timer_start("eigenvectors: brute force ")
call e_complex%eigenvectors(a_complex, ev_complex, z_complex, error)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment