Commit 09005431 authored by Andreas Marek's avatar Andreas Marek
Browse files

Remove hh_transform, symm|herm_matrix_allreduce macros from precision_macros.h

Due to a bug in the Intel compiler some lines could not be
generalised.
parent 889616ed
......@@ -229,13 +229,18 @@
!-------------------------------------------------------------------------------
#endif
#if REALCASE == 1
subroutine hh_transform_real_&
#endif
#if COMPLEXCASE == 1
subroutine hh_transform_complex_&
#endif
&PRECISION &
(alpha, xnorm_sq, xf, tau)
#if REALCASE == 1
subroutine hh_transform_real_PRECISION(alpha, xnorm_sq, xf, tau)
! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
#endif
#if COMPLEXCASE == 1
subroutine hh_transform_complex_PRECISION(alpha, xnorm_sq, xf, tau)
! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:)
#endif
! and returns the factor xf by which x has to be scaled.
......@@ -265,12 +270,9 @@
real(kind=REAL_DATATYPE) :: BETA
#if REALCASE == 1
call timer%start("hh_transform_real" // PRECISION_SUFFIX )
#endif
#if COMPLEXCASE == 1
call timer%start("hh_transform_complex" // PRECISION_SUFFIX )
#endif
call timer%start("hh_tranform_&
&MATH_DATATYPE&
&" // PRECISION_SUFFIX )
#if COMPLEXCASE == 1
ALPHR = real( ALPHA, kind=REAL_DATATYPE )
......@@ -331,16 +333,14 @@
ALPHA = BETA
endif
#if REALCASE == 1
call timer%stop("hh_transform_real" // PRECISION_SUFFIX )
#endif
#if COMPLEXCASE == 1
call timer%stop("hh_transform_complex" // PRECISION_SUFFIX )
#endif
call timer%stop("hh_transform_&
&MATH_DATATYPE&
&" // PRECISION_SUFFIX )
#if REALCASE == 1
end subroutine hh_transform_real_PRECISION
end subroutine hh_transform_real_&
#endif
#if COMPLEXCASE == 1
end subroutine hh_transform_complex_PRECISION
end subroutine hh_transform_complex_&
#endif
&PRECISION
......@@ -488,11 +488,13 @@
! Householder transformation
#if REALCASE == 1
call hh_transform_real_PRECISION(vrl, vnorm2, xf, tau(istep))
call hh_transform_real_&
#endif
#if COMPLEXCASE == 1
call hh_transform_complex_PRECISION(vrl, vnorm2, xf, tau(istep))
call hh_transform_complex_&
#endif
&PRECISION &
(vrl, vnorm2, xf, tau(istep))
! Scale v_row and store Householder vector for back transformation
v_row(1:l_rows) = v_row(1:l_rows) * xf
......@@ -1053,7 +1055,14 @@
else !useGPU
vrl = a_mat(1,l_cols)
endif !useGPU
call hh_transform_complex_PRECISION(vrl, CONST_REAL_0_0, xf, tau(2))
#if REALCASE == 1
call hh_transform_real_&
#endif
#if COMPLEXCASE == 1
call hh_transform_complex_&
#endif
&PRECISION &
(vrl, CONST_REAL_0_0, xf, tau(2))
e_vec(1) = vrl
a_mat(1,l_cols) = 1. ! for consistency only
......
......@@ -771,11 +771,13 @@
! Householder transformation
#if REALCASE == 1
call hh_transform_real_PRECISION(vrl, vnorm2, xf, tau)
call hh_transform_real_&
#endif
#if COMPLEXCASE == 1
call hh_transform_complex_PRECISION(vrl, vnorm2, xf, tau)
call hh_transform_complex_&
#endif
&PRECISION &
(vrl, vnorm2, xf, tau)
! Scale vr and store Householder vector for back transformation
vr(1:lr) = vr(1:lr) * xf
......@@ -1034,11 +1036,13 @@
#endif
call timer%stop("blas")
#if REALCASE == 1
call symm_matrix_allreduce_PRECISION(n_cols,vav, nbw, nbw,mpi_comm_rows)
call symm_matrix_allreduce_&
#endif
#if COMPLEXCASE == 1
call herm_matrix_allreduce_PRECISION(n_cols,vav, nbw,nbw,mpi_comm_rows)
call herm_matrix_allreduce_&
#endif
&PRECISION &
(n_cols,vav, nbw, nbw,mpi_comm_rows)
! Calculate triangular matrix T for block Householder Transformation
call timer%start("blas")
do lc=n_cols,1,-1
......@@ -1417,7 +1421,9 @@
stop
endif
call symm_matrix_allreduce_PRECISION(n_cols,vav, nbw,nbw,mpi_comm_cols)
call symm_matrix_allreduce_&
&PRECISION &
(n_cols,vav, nbw,nbw,mpi_comm_cols)
successCUDA = cuda_memcpy(vav_dev, loc(vav(1,1)), nbw*nbw*size_of_PRECISION_real,cudaMemcpyHostToDevice)
if (.not.(successCUDA)) then
......@@ -1526,7 +1532,9 @@
call PRECISION_TRMM('Right', 'Upper', 'Trans', 'Nonunit', n_cols, n_cols, CONST_1_0, tmat(1,1,istep), &
ubound(tmat,dim=1), vav, ubound(vav,dim=1))
call timer%stop("blas")
call symm_matrix_allreduce_PRECISION(n_cols,vav, nbw, nbw ,mpi_comm_cols)
call symm_matrix_allreduce_&
&PRECISION &
(n_cols,vav, nbw, nbw ,mpi_comm_cols)
! U = U - 0.5 * V * VAV
call timer%start("blas")
......@@ -1695,7 +1703,9 @@
stop
endif
call herm_matrix_allreduce_PRECISION(n_cols,vav, nbw, nbw,mpi_comm_cols)
call herm_matrix_allreduce_&
&PRECISION &
(n_cols,vav, nbw, nbw,mpi_comm_cols)
successCUDA = cuda_memcpy(vav_dev,loc(vav(1,1)),nbw*nbw*size_of_PRECISION_complex,cudaMemcpyHostToDevice)
if (.not.(successCUDA)) then
......@@ -1709,7 +1719,9 @@
call PRECISION_TRMM('Right', 'Upper', 'C', 'Nonunit', n_cols, n_cols, CONE, tmat(1,1,istep), &
ubound(tmat,dim=1), vav, ubound(vav,dim=1))
call timer%stop("blas")
call herm_matrix_allreduce_PRECISION(n_cols,vav,nbw,nbw,mpi_comm_cols)
call herm_matrix_allreduce_&
&PRECISION &
(n_cols,vav,nbw,nbw,mpi_comm_cols)
endif
! U = U - 0.5 * V * VAV
......
subroutine herm_matrix_allreduce_PRECISION(n,a,lda,ldb,comm)
subroutine herm_matrix_allreduce_&
&PRECISION &
(n,a,lda,ldb,comm)
!-------------------------------------------------------------------------------
! herm_matrix_allreduce: Does an mpi_allreduce for a hermitian matrix A.
! On entry, only the upper half of A needs to be set
! On exit, the complete matrix is set
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
#else
use timings_dummy
#endif
......@@ -60,6 +62,7 @@
call timer%stop("herm_matrix_allreduce" // PRECISION_SUFFIX)
end subroutine herm_matrix_allreduce_PRECISION
end subroutine herm_matrix_allreduce_&
&PRECISION
subroutine symm_matrix_allreduce_PRECISION(n,a,lda,ldb,comm)
subroutine symm_matrix_allreduce_&
&PRECISION &
(n,a,lda,ldb,comm)
!-------------------------------------------------------------------------------
! symm_matrix_allreduce: Does an mpi_allreduce for a symmetric matrix A.
! On entry, only the upper half of A needs to be set
......@@ -60,6 +62,8 @@
call timer%stop("symm_matrix_allreduce" // PRECISION_SUFFIX)
end subroutine symm_matrix_allreduce_PRECISION
end subroutine symm_matrix_allreduce_&
&PRECISION
......@@ -107,14 +107,14 @@
#else
real(kind=REAL_DATATYPE), intent(in) :: a(lda,matrixCols)
#endif
#endif
#endif /* REALCASE */
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE),intent(in) :: a(lda,*)
#else
complex(kind=COMPLEX_DATATYPE), intent(in) :: a(lda,matrixCols)
#endif
#endif
#endif /* COMPLEXCASE */
real(kind=REAL_DATATYPE), intent(out) :: d(na), e(na) ! set only on PE 0
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(out), &
......@@ -510,7 +510,7 @@
#if COMPLEXCASE == 1
startAddr = ubound(hh_trans_complex,dim=2)
#endif
#endif
#endif /* WITH_MPI */
#ifdef WITH_OPENMP
do istep=1,na-1-block_limits(my_pe)*nb
......@@ -543,10 +543,6 @@
if (istep < na-1) then
! Transform first column of remaining matrix
vnorm2 = sum(ab(3:n+1,na_s-n_off)**2)
call hh_transform_real_PRECISION(ab(2,na_s-n_off),vnorm2,hf,tau)
hv(1) = CONST_1_0
hv(2:n) = ab(3:n+1,na_s-n_off)*hf
endif
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
......@@ -555,11 +551,26 @@
vnorm2 = sum(real(ab(3:n+1,na_s-n_off),kind=rk4)**2+aimag(ab(3:n+1,na_s-n_off))**2)
#endif
if (n<2) vnorm2 = 0. ! Safety only
call hh_transform_complex_PRECISION(ab(2,na_s-n_off),vnorm2,hf,tau)
#endif /* COMPLEXCASE */
#if REALCASE == 1
call hh_transform_real_&
#endif
#if COMPLEXCASE == 1
call hh_transform_complex_&
#endif
&PRECISION &
(ab(2,na_s-n_off),vnorm2,hf,tau)
#if REALCASE == 1
hv(1) = CONST_1_0
hv(2:n) = ab(3:n+1,na_s-n_off)*hf
endif
#endif
#if COMPLEXCASE == 1
hv(1) = CONST_COMPLEX_1_0
hv(2:n) = ab(3:n+1,na_s-n_off)*hf
#endif /* COMPLEXCASE == 1 */
#endif
d(istep) = ab(1,na_s-n_off)
e(istep) = ab(2,na_s-n_off)
if (istep == na-1) then
......@@ -753,9 +764,6 @@
#if REALCASE == 1
vnorm2 = sum(ab(nb+2:nb+nr,ns)**2)
call hh_transform_real_PRECISION(ab(nb+1,ns),vnorm2,hf,tau_t(my_thread))
hv_t(1 ,my_thread) = CONST_1_0
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
......@@ -763,9 +771,21 @@
#else
vnorm2 = sum(real(ab(nb+2:nb+nr,ns))**2+aimag(ab(nb+2:nb+nr,ns))**2)
#endif
#endif /* COMPLEXCASE */
call hh_transform_complex_PRECISION(ab(nb+1,ns),vnorm2,hf,tau_t(my_thread))
#if REALCASE == 1
call hh_transform_real_&
#endif
#if COMPLEXCASE == 1
call hh_transform_complex_&
#endif
&PRECISION &
(ab(nb+1,ns),vnorm2,hf,tau_t(my_thread))
#if REALCASE == 1
hv_t(1 ,my_thread) = CONST_1_0
#endif
#if COMPLEXCASE == 1
hv_t(1 ,my_thread) = CONST_COMPLEX_1_0
#endif
hv_t(2:nr,my_thread) = ab(nb+2:nb+nr,ns)*hf
......@@ -1108,8 +1128,6 @@
if (nr>1) then
#if REALCASE == 1
vnorm2 = sum(ab(nb+2:nb+nr,ns)**2)
call hh_transform_real_PRECISION(ab(nb+1,ns),vnorm2,hf,tau_new)
hv_new(1) = CONST_1_0
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
......@@ -1117,11 +1135,22 @@
#else
vnorm2 = sum(real(ab(nb+2:nb+nr,ns),kind=rk4)**2+aimag(ab(nb+2:nb+nr,ns))**2)
#endif
#endif /* COMPLEXCASE */
call hh_transform_complex_PRECISION(ab(nb+1,ns),vnorm2,hf,tau_new)
#if REALCASE == 1
call hh_transform_real_&
#endif
#if COMPLEXCASE == 1
call hh_transform_complex_&
#endif
&PRECISION &
(ab(nb+1,ns),vnorm2,hf,tau_new)
#if REALCASE == 1
hv_new(1) = CONST_1_0
#endif
#if COMPLEXCASE == 1
hv_new(1) = CONST_COMPLEX_1_0
#endif /* COMPLEXCASE */
#endif
hv_new(2:nr) = ab(nb+2:nb+nr,ns)*hf
#if REALCASE == 1
ab(nb+2:,ns) = CONST_0_0
......@@ -1129,7 +1158,7 @@
#if COMPLEXCASE == 1
ab(nb+2:,ns) = CONST_COMPLEX_0_0
#endif
endif
endif ! nr > 1
! ... and send it away immediatly if this is the last block
......
......@@ -5,13 +5,6 @@
#undef PRECISION_STR
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef qr_pdgeqrf_2dcomm_PRECISION_STR
#undef hh_transform_NUMBER_PRECISION
#undef hh_transform_NUMBER_PRECISION_STR
#undef hh_transform_real_PRECISION
#undef symm_matrix_allreduce_PRECISION
#undef symm_matrix_allreduce_PRECISION_STR
#undef herm_matrix_allreduce_PRECISION
#undef herm_matrix_allreduce_PRECISION_STR
#undef redist_band_NUMBER_PRECISION
#undef redist_band_NUMBER_PRECISION_STR
#undef redist_band_real_PRECISION
......@@ -124,13 +117,6 @@
#define PRECISION_STR 'double'
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_double
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_double'
#define hh_transform_NUMBER_PRECISION hh_transform_real_double
#define hh_transform_NUMBER_PRECISION_STR 'hh_transform_real_double'
#define hh_transform_real_PRECISION hh_transform_real_double
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_double
#define symm_matrix_allreduce_PRECISION_STR 'symm_matrix_allreduce_double'
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_double
#define herm_matrix_allreduce_PRECISION_STR 'herm_matrix_allreduce_double'
#define redist_band_NUMBER_PRECISION redist_band_real_double
#define redist_band_NUMBER_PRECISION_STR 'redist_band_real_double'
#define redist_band_real_PRECISION redist_band_real_double
......@@ -244,13 +230,6 @@
#define PRECISION_STR 'single'
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_single
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_single'
#define hh_transform_NUMBER_PRECISION hh_transform_real_single
#define hh_transform_NUMBER_PRECISION_STR 'hh_transform_real_single'
#define hh_transform_real_PRECISION hh_transform_real_single
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_single
#define symm_matrix_allreduce_PRECISION_STR 'symm_matrix_allreduce_single'
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_single
#define herm_matrix_allreduce_PRECISION_STR 'herm_matrix_allreduce_single'
#define redist_band_NUMBER_PRECISION redist_band_real_single
#define redist_band_NUMBER_PRECISION_STR 'redist_band_real_single'
#define redist_band_real_PRECISION redist_band_real_single
......@@ -367,13 +346,6 @@
#undef PRECISION_STR
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef qr_pdgeqrf_2dcomm_PRECISION_STR
#undef hh_transform_NUMBER_PRECISION
#undef hh_transform_NUMBER_PRECISION_STR
#undef hh_transform_complex_PRECISION
#undef symm_matrix_allreduce_PRECISION
#undef symm_matrix_allreduce_PRECISION_STR
#undef herm_matrix_allreduce_PRECISION
#undef herm_matrix_allreduce_PRECISION_STR
#undef redist_band_NUMBER_PRECISION
#undef redist_band_NUMBER_PRECISION_STR
#undef redist_band_complex_PRECISION
......@@ -496,13 +468,6 @@
#define PRECISION_STR 'double'
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_double
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_double'
#define hh_transform_NUMBER_PRECISION hh_transform_complex_double
#define hh_transform_NUMBER_PRECISION_STR 'hh_transform_complex_double'
#define hh_transform_complex_PRECISION hh_transform_complex_double
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_double
#define symm_matrix_allreduce_PRECISION_STR 'symm_matrix_allreduce_double'
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_double
#define herm_matrix_allreduce_PRECISION_STR 'herm_matrix_allreduce_double'
#define redist_band_NUMBER_PRECISION redist_band_complex_double
#define redist_band_NUMBER_PRECISION_STR 'redist_band_complex_double'
#define redist_band_complex_PRECISION redist_band_complex_double
......@@ -626,13 +591,6 @@
#define PRECISION_STR 'single'
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_single
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_single'
#define hh_transform_NUMBER_PRECISION hh_transform_complex_single
#define hh_transform_NUMBER_PRECISION_STR 'hh_transform_complex_single'
#define hh_transform_complex_PRECISION hh_transform_complex_single
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_single
#define symm_matrix_allreduce_PRECISION_STR 'symm_matrix_allreduce_single'
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_single
#define herm_matrix_allreduce_PRECISION_STR 'herm_matrix_allreduce_single'
#define redist_band_NUMBER_PRECISION redist_band_complex_single
#define redist_band_NUMBER_PRECISION_STR 'redist_band_complex_single'
#define redist_band_complex_PRECISION redist_band_complex_single
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment