Commit fcb42f7a authored by Carolin Penke's avatar Carolin Penke
Browse files

skew-symmetry as elpa object property instead of preprocessor statements

parent 6e686b25
......@@ -123,6 +123,8 @@
real(kind=rk) :: eps
#endif
logical, intent(in) :: useGPU
integer(kind=c_int) :: skewsymmetric
logical :: isSkewsymmetric
character(20) :: gpuString
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
......@@ -188,6 +190,13 @@
logical :: useGPU_reduction_lower_block_to_tridiagonal
integer(kind=ik), intent(in) :: max_threads
call obj%get("is_skewsymmetric",skewsymmetric,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
if(useGPU) then
gpuString = "_gpu"
else
......@@ -1156,14 +1165,18 @@
if (i==0) cycle
lre = min(l_rows,i*l_rows_tile)
call obj%timer%start("blas")
call PRECISION_GEMM('N', 'N', lre, n_cols, lce-lcs+1, ONE, a_mat(1,lcs), lda, &
if (isSkewsymmetric) then
call PRECISION_GEMM('N', 'N', lre, n_cols, lce-lcs+1, ONE, a_mat(1,lcs), lda, &
umcCPU(lcs,n_cols+1), ubound(umcCPU,dim=1), &
#if SKEWSYMMETRIC == 1
-ONE, &
#else
ONE, &
#endif
-ONE, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1))
else
call PRECISION_GEMM('N', 'N', lre, n_cols, lce-lcs+1, ONE, a_mat(1,lcs), lda, &
umcCPU(lcs,n_cols+1), ubound(umcCPU,dim=1), &
ONE, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1))
endif
call obj%timer%stop("blas")
endif ! useGPU
enddo ! i=0,(istep*nbw-1)/tile_size
......@@ -1364,17 +1377,21 @@
endif ! useGPU
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
call ssymm_matrix_allreduce_&
#else
call symm_matrix_allreduce_&
#endif
if (isSkewsymmetric) then
call ssymm_matrix_allreduce_&
&PRECISION &
(obj, n_cols,vav, nbw, nbw ,mpi_comm_cols)
else
call symm_matrix_allreduce_&
&PRECISION &
(obj, n_cols,vav, nbw, nbw ,mpi_comm_cols)
endif
#endif
#if COMPLEXCASE == 1
call herm_matrix_allreduce_&
#endif
&PRECISION &
(obj, n_cols,vav, nbw, nbw ,mpi_comm_cols)
#endif
if (useGPU) then
successCUDA = cuda_memcpy(vav_dev, loc(vav(1,1)), nbw*nbw*size_of_datatype,cudaMemcpyHostToDevice)
......@@ -1445,37 +1462,48 @@
endif
else ! useGPU
call obj%timer%start("blas")
call PRECISION_GEMM('N', 'N', l_cols, n_cols, n_cols, &
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
if (isSkewsymmetric) then
call PRECISION_GEMM('N', 'N', l_cols, n_cols, n_cols, &
0.5_rk, &
#else
-0.5_rk, &
#endif
#endif
umcCPU(1,n_cols+1), ubound(umcCPU,dim=1), vav, &
ubound(vav,dim=1), ONE, umcCPU, ubound(umcCPU,dim=1))
else
call PRECISION_GEMM('N', 'N', l_cols, n_cols, n_cols, &
-0.5_rk, &
umcCPU(1,n_cols+1), ubound(umcCPU,dim=1), vav, &
ubound(vav,dim=1), ONE, umcCPU, ubound(umcCPU,dim=1))
endif
#endif
#if COMPLEXCASE == 1
call PRECISION_GEMM('N', 'N', l_cols, n_cols, n_cols, &
(-0.5_rk, 0.0_rk), &
#endif
umcCPU(1,n_cols+1), ubound(umcCPU,dim=1), vav, &
ubound(vav,dim=1), ONE, umcCPU, ubound(umcCPU,dim=1))
#endif
call obj%timer%stop("blas")
! Transpose umc -> umr (stored in vmr, second half)
#if SKEWSYMMETRIC == 1
call elpa_transpose_vectors_ss_&
#else
call elpa_transpose_vectors_&
#endif
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1), mpi_comm_rows, &
1, istep*nbw, n_cols, nblk, max_threads)
if (isSkewsymmetric) then
call elpa_transpose_vectors_ss_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1), mpi_comm_rows, &
1, istep*nbw, n_cols, nblk, max_threads)
else
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1), mpi_comm_rows, &
1, istep*nbw, n_cols, nblk, max_threads)
endif
endif ! useGPU
! A = A - V*U**T - U*V**T
#ifdef WITH_OPENMP
......
......@@ -72,6 +72,7 @@
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
logical :: useGPU
logical :: isSkewsymmetric
#if REALCASE == 1
logical :: useQR
logical :: useQRActual
......@@ -83,7 +84,7 @@
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,*)
#else
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
#ifdef SKEWSYMMETRIC == 1
#if SKEWSYMMETRIC == 1
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols)
#else
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
......@@ -111,7 +112,7 @@
integer(kind=c_int) :: i
logical :: success, successCUDA
logical :: wantDebug
integer(kind=c_int) :: istat, gpu, debug, qr
integer(kind=c_int) :: istat, gpu, skewsymmetric, debug, qr
character(200) :: errorMessage
logical :: do_useGPU, do_useGPU_bandred, &
do_useGPU_tridiag_band, do_useGPU_solve_tridi, &
......@@ -130,9 +131,9 @@
do_trans_to_band, do_trans_to_full
integer(kind=ik) :: nrThreads
#if SKEWSYMMETRIC ==1
! #if SKEWSYMMETRIC ==1
integer(kind=ik) :: global_index
#endif
! #endif
#if REALCASE == 1
#undef GPU_KERNEL
#undef GENERIC_KERNEL
......@@ -235,6 +236,14 @@
print *,"Problem getting option. Aborting..."
stop
endif
call obj%get("is_skewsymmetric",skewsymmetric,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
! GPU settings
call obj%get("gpu", gpu,error)
......@@ -672,31 +681,31 @@
stop 1
endif
#endif
#if SKEWSYMMETRIC == 1
if (isSkewsymmetric) then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols) = 0.0
do i = 1, obj%local_nrows
! global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
global_index = np_rows*nblk*((i-1)/nblk) + MOD(i-1,nblk) + MOD(np_rows+my_prow-0, np_rows)*nblk + 1
if (mod(global_index-1,4) .eq. 0) then
! do nothing
end if
if (mod(global_index-1,4) .eq. 1) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
if (mod(global_index-1,4) .eq. 2) then
q(i,1:obj%local_ncols) = -q(i,1:obj%local_ncols)
end if
if (mod(global_index-1,4) .eq. 3) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = -q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
end do
#endif
q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols) = 0.0
do i = 1, obj%local_nrows
! global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
global_index = np_rows*nblk*((i-1)/nblk) + MOD(i-1,nblk) + MOD(np_rows+my_prow-0, np_rows)*nblk + 1
if (mod(global_index-1,4) .eq. 0) then
! do nothing
end if
if (mod(global_index-1,4) .eq. 1) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
if (mod(global_index-1,4) .eq. 2) then
q(i,1:obj%local_ncols) = -q(i,1:obj%local_ncols)
end if
if (mod(global_index-1,4) .eq. 3) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = -q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
end do
endif
! Backtransform stage 1
call obj%timer%start("trans_ev_to_band")
......@@ -709,18 +718,18 @@
q_dev, &
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU_trans_ev_tridi_to_band, &
nrThreads, success=success, kernel=kernel)
#if SKEWSYMMETRIC == 1
if (isSkewsymmetric) then
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
call trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), &
q_dev, &
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU_trans_ev_tridi_to_band, &
nrThreads, success=success, kernel=kernel)
#endif
call trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), &
q_dev, &
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU_trans_ev_tridi_to_band, &
nrThreads, success=success, kernel=kernel)
endif
call obj%timer%stop("trans_ev_to_band")
if (.not.(success)) return
......@@ -789,22 +798,22 @@
, useQRActual &
#endif
)
#if SKEWSYMMETRIC == 1
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_band_to_full_ acting on the n x 2n matrix.
call trans_ev_band_to_full_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, a, &
a_dev, lda, tmat, tmat_dev, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), &
q_dev, &
ldq, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev_band_to_full &
if (isSkewsymmetric) then
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_band_to_full_ acting on the n x 2n matrix.
call trans_ev_band_to_full_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, a, &
a_dev, lda, tmat, tmat_dev, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), &
q_dev, &
ldq, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev_band_to_full &
#if REALCASE == 1
, useQRActual &
#endif
)
, useQRActual &
#endif
)
endif
deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
......
......@@ -96,6 +96,8 @@
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
logical, intent(in) :: useGPU, wantDebug
integer(kind=c_int) :: skewsymmetric
logical :: isSkewsymmetric
integer(kind=ik), intent(in) :: na, nb, nblk, lda, matrixCols, mpi_comm_rows, mpi_comm_cols, communicator
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=rck), intent(in) :: a_mat(lda,*)
......@@ -136,6 +138,13 @@
integer(kind=ik) :: startAddr
#endif
call obj%get("is_skewsymmetric",skewsymmetric,istat)
if (istat .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
if(useGPU) then
gpuString = "_gpu"
else
......@@ -486,11 +495,11 @@
#endif
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
d(istep) = 0
#else
d(istep) = ab(1,na_s-n_off)
#endif
if (isSkewsymmetric) then
d(istep) = 0
else
d(istep) = ab(1,na_s-n_off)
endif
e(istep) = ab(2,na_s-n_off)
#endif
#if COMPLEXCASE == 1
......@@ -500,12 +509,13 @@
if (istep == na-1) then
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
d(na) = 0
#else
d(na) = ab(1,na_s+1-n_off)
#endif
if (isSkewsymmetric) then
d(istep) = 0
else
d(na) = ab(1,na_s+1-n_off)
endif
#endif
#if COMPLEXCASE == 1
d(na) = real(ab(1,na_s+1-n_off),kind=rk)
#endif
......@@ -863,12 +873,12 @@
if (wantDebug) call obj%timer%start("blas")
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
call dssmv('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
#else
call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
#endif
if (isSkewsymmetric) then
! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
call dssmv('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
else
call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
endif
#endif
#if COMPLEXCASE == 1
call PRECISION_HEMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd,1)
......@@ -903,12 +913,12 @@
! Normal matrix multiply
if (wantDebug) call obj%timer%start("blas")
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
call dssmv('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
#else
call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
#endif
if (isSkewsymmetric) then
! call PRECISION_SSMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
call dssmv('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
else
call PRECISION_SYMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
endif
#endif
#if COMPLEXCASE == 1
call PRECISION_HEMV('L', nc, tau, ab(1,ns), 2*nb-1, hv, 1, ZERO, hd, 1)
......@@ -979,28 +989,31 @@
! Transform diagonal block
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
#else
x = dot_product(hv(1:nc),hd(1:nc))*tau
#endif
if (.NOT. isSkewsymmetric) then
x = dot_product(hv(1:nc),hd(1:nc))*tau
endif
#endif
#if COMPLEXCASE == 1
x = dot_product(hv(1:nc),hd(1:nc))*conjg(tau)
#endif
#if SKEWSYMMETRIC == 1
#else
hd(1:nc) = hd(1:nc) - 0.5_rk*x*hv(1:nc)
#if REALCASE == 1
if (.NOT. isSkewsymmetric) then
#endif
hd(1:nc) = hd(1:nc) - 0.5_rk*x*hv(1:nc)
#if REALCASE == 1
endif
#endif
if (my_pe>0 .and. iblk==1) then
! The first column of the diagonal block has to be send to the previous PE
! Calculate first column only ...
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
ab(1:nc,ns) = ab(1:nc,ns) - hd(1:nc)*hv(1) + hv(1:nc)*hd(1)
#else
ab(1:nc,ns) = ab(1:nc,ns) - hd(1:nc)*hv(1) - hv(1:nc)*hd(1)
#endif
if (isSkewsymmetric) then
ab(1:nc,ns) = ab(1:nc,ns) - hd(1:nc)*hv(1) + hv(1:nc)*hd(1)
else
ab(1:nc,ns) = ab(1:nc,ns) - hd(1:nc)*hv(1) - hv(1:nc)*hd(1)
endif
#endif
#if COMPLEXCASE == 1
ab(1:nc,ns) = ab(1:nc,ns) - hd(1:nc)*conjg(hv(1)) - hv(1:nc)*conjg(hd(1))
......@@ -1025,12 +1038,12 @@
! ... and calculate remaining columns with rank-2 update
if (wantDebug) call obj%timer%start("blas")
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
if (isSkewsymmetric) then
! if (nc>1) call PRECISION_SSR2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
if (nc>1) call dssr2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
#else
if (nc>1) call PRECISION_SYR2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
#endif
if (nc>1) call dssr2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
else
if (nc>1) call PRECISION_SYR2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
endif
#endif
#if COMPLEXCASE == 1
if (nc>1) call PRECISION_HER2('L', nc-1, -ONE, hd(2), 1, hv(2), 1, ab(1,ns+1), 2*nb-1)
......@@ -1041,12 +1054,12 @@
! No need to send, just a rank-2 update
if (wantDebug) call obj%timer%start("blas")
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
! call PRECISION_SSR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
call dssr2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
#else
call PRECISION_SYR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
#endif
if (isSkewsymmetric) then
! call PRECISION_SSR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
call dssr2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
else
call PRECISION_SYR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
endif
#endif
#if COMPLEXCASE == 1
call PRECISION_HER2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment