Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
7c19d880
Commit
7c19d880
authored
Nov 03, 2016
by
Pavel Kus
Browse files
single/double unification of elpa1_compute_complex_template.X90
Conflicts: src/elpa1_compute_complex_template.X90
parent
c2037d4e
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
7c19d880
...
@@ -56,7 +56,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
...
@@ -56,7 +56,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/redist_band.X90
\
src/redist_band.X90
\
src/precision_macros.h
src/precision_macros.h
\
src/precision_macros_complex.h
lib_LTLIBRARIES
=
libelpa@SUFFIX@.la
lib_LTLIBRARIES
=
libelpa@SUFFIX@.la
libelpa@SUFFIX@
_la_LINK
=
$(FCLINK)
$(AM_LDFLAGS)
-version-info
$(ELPA_SO_VERSION)
libelpa@SUFFIX@
_la_LINK
=
$(FCLINK)
$(AM_LDFLAGS)
-version-info
$(ELPA_SO_VERSION)
...
@@ -888,6 +889,7 @@ EXTRA_DIST = \
...
@@ -888,6 +889,7 @@ EXTRA_DIST = \
src/elpa2_compute_real_template.X90
\
src/elpa2_compute_real_template.X90
\
src/elpa2_compute_complex_template.X90
\
src/elpa2_compute_complex_template.X90
\
src/precision_macros.h
\
src/precision_macros.h
\
src/precision_macros_complex.h
\
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
...
...
generate/generate_precission.py
0 → 100755
View file @
7c19d880
#!/usr/bin/python
simple_tokens
=
[
"tridiag_complex_PRECISION"
,
"trans_ev_complex_PRECISION"
,
"solve_complex_PRECISION"
,
"hh_transform_complex_PRECISION"
,
"elpa_transpose_vectors_complex_PRECISION"
,
"elpa_reduce_add_vectors_complex_PRECISION"
,
]
blas_tokens
=
[
"PRECISION_GEMV"
,
"PRECISION_TRMV"
,
"PRECISION_GEMM"
,
"PRECISION_TRMM"
,
"PRECISION_HERK"
,
]
explicit_tokens
=
[(
"PRECISION_SUFFIX"
,
"
\"
_double
\"
"
,
"
\"
_single
\"
"
),
(
"MPI_COMPLEX_PRECISION"
,
"MPI_DOUBLE_COMPLEX"
,
"MPI_COMPLEX"
),
(
"MPI_REAL_PRECISION"
,
"MPI_REAL8"
,
"MPI_REAL4"
),
(
"KIND_PRECISION"
,
"rk8"
,
"rk4"
),
(
"PRECISION_CMPLX"
,
"DCMPLX"
,
"CMPLX"
),
(
"PRECISION_IMAG"
,
"DIMAG"
,
"AIMAG"
),
(
"CONST_REAL_0_0"
,
"0.0_rk8"
,
"0.0_rk4"
),
(
"CONST_REAL_1_0"
,
"1.0_rk8"
,
"1.0_rk4"
),
]
print
"#ifdef DOUBLE_PRECISION_COMPLEX"
for
token
in
simple_tokens
:
print
"#define "
,
token
,
token
.
replace
(
"PRECISION"
,
"double"
)
for
token
in
blas_tokens
:
print
"#define "
,
token
,
token
.
replace
(
"PRECISION_"
,
"Z"
)
for
token
in
explicit_tokens
:
print
"#define "
,
token
[
0
],
token
[
1
]
print
"#else"
for
token
in
simple_tokens
:
print
"#undef "
,
token
for
token
in
blas_tokens
:
print
"#undef "
,
token
for
token
in
explicit_tokens
:
print
"#undef "
,
token
[
0
]
for
token
in
simple_tokens
:
print
"#define "
,
token
,
token
.
replace
(
"PRECISION"
,
"single"
)
for
token
in
blas_tokens
:
print
"#define "
,
token
,
token
.
replace
(
"PRECISION_"
,
"C"
)
for
token
in
explicit_tokens
:
print
"#define "
,
token
[
0
],
token
[
2
]
print
"#endif"
src/elpa1_compute_complex_template.X90
View file @
7c19d880
...
@@ -52,11 +52,10 @@
...
@@ -52,11 +52,10 @@
! distributed along with the original code in the file "COPYING".
! distributed along with the original code in the file "COPYING".
#endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
#include "precision_macros_complex.h"
subroutine tridiag_complex_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, d, e, tau)
#else
subroutine tridiag_complex_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, d, e, tau)
subroutine tridiag_complex_PRECISION(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, d, e, tau)
#endif
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
! tridiag_complex: Reduces a distributed hermitian matrix to tridiagonal form
! tridiag_complex: Reduces a distributed hermitian matrix to tridiagonal form
! (like Scalapack Routine PZHETRD)
! (like Scalapack Routine PZHETRD)
...
@@ -88,6 +87,8 @@
...
@@ -88,6 +87,8 @@
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
use timings
use timings
#else
use timings_dummy
#endif
#endif
use precision
use precision
implicit none
implicit none
...
@@ -129,13 +130,7 @@
...
@@ -129,13 +130,7 @@
integer(kind=ik) :: istat
integer(kind=ik) :: istat
character(200) :: errorMessage
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("tridiag_complex" // PRECISION_SUFFIX)
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("tridiag_complex_double")
#else
call timer%start("tridiag_complex_single")
#endif
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
...
@@ -257,13 +252,8 @@
...
@@ -257,13 +252,8 @@
vr(1:l_rows) = a(1:l_rows,l_cols+1)
vr(1:l_rows) = a(1:l_rows,l_cols+1)
if (nstor>0 .and. l_rows>0) then
if (nstor>0 .and. l_rows>0) then
aux(1:2*nstor) = conjg(uvc(l_cols+1,1:2*nstor))
aux(1:2*nstor) = conjg(uvc(l_cols+1,1:2*nstor))
#ifdef DOUBLE_PRECISION_COMPLEX
call PRECISION_GEMV('N', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), &
call ZGEMV('N', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), &
aux, 1, CONE, vr, 1)
#else
call CGEMV('N', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), &
aux, 1, CONE, vr, 1)
aux, 1, CONE, vr, 1)
#endif
endif
endif
if (my_prow==prow(istep-1, nblk, np_rows)) then
if (my_prow==prow(istep-1, nblk, np_rows)) then
...
@@ -277,13 +267,7 @@
...
@@ -277,13 +267,7 @@
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
call mpi_allreduce(aux1, aux2, 2, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(aux1, aux2, 2, MPI_DOUBLE_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#else
call mpi_allreduce(aux1, aux2, 2, MPI_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#endif
vnorm2 = aux2(1)
vnorm2 = aux2(1)
vrl = aux2(2)
vrl = aux2(2)
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
...
@@ -302,11 +286,7 @@
...
@@ -302,11 +286,7 @@
! vrl = aux2(2)
! vrl = aux2(2)
! Householder transformation
! Householder transformation
#ifdef DOUBLE_PRECISION_COMPLEX
call hh_transform_complex_PRECISION(vrl, vnorm2, xf, tau(istep))
call hh_transform_complex_double(vrl, vnorm2, xf, tau(istep))
#else
call hh_transform_complex_single(vrl, vnorm2, xf, tau(istep))
#endif
! Scale vr and store Householder vector for back transformation
! Scale vr and store Householder vector for back transformation
vr(1:l_rows) = vr(1:l_rows) * xf
vr(1:l_rows) = vr(1:l_rows) * xf
...
@@ -325,12 +305,7 @@
...
@@ -325,12 +305,7 @@
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
call MPI_Bcast(vr, l_rows+1, MPI_COMPLEX_PRECISION, pcol(istep, nblk, np_cols), mpi_comm_cols, mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call MPI_Bcast(vr, l_rows+1, MPI_DOUBLE_COMPLEX, pcol(istep, nblk, np_cols), mpi_comm_cols, mpierr)
#else
call MPI_Bcast(vr, l_rows+1, MPI_COMPLEX, pcol(istep, nblk, np_cols), mpi_comm_cols, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
call timer%stop("mpi_communication")
#endif
#endif
...
@@ -343,15 +318,9 @@
...
@@ -343,15 +318,9 @@
! call elpa_transpose_vectors (vr, 2*ubound(vr,dim=1), mpi_comm_rows, &
! call elpa_transpose_vectors (vr, 2*ubound(vr,dim=1), mpi_comm_rows, &
! vc, 2*ubound(vc,dim=1), mpi_comm_cols, &
! vc, 2*ubound(vc,dim=1), mpi_comm_cols, &
! 1, 2*(istep-1), 1, 2*nblk)
! 1, 2*(istep-1), 1, 2*nblk)
#ifdef DOUBLE_PRECISION_COMPLEX
call elpa_transpose_vectors_complex_PRECISION (vr, ubound(vr,dim=1), mpi_comm_rows, &
call elpa_transpose_vectors_complex_double (vr, ubound(vr,dim=1), mpi_comm_rows, &
vc, ubound(vc,dim=1), mpi_comm_cols, &
vc, ubound(vc,dim=1), mpi_comm_cols, &
1, (istep-1), 1, nblk)
1, (istep-1), 1, nblk)
#else
call elpa_transpose_vectors_complex_single (vr, ubound(vr,dim=1), mpi_comm_rows, &
vc, ubound(vc,dim=1), mpi_comm_cols, &
1, (istep-1), 1, nblk)
#endif
! Calculate u = (A + VU**T + UV**T)*v
! Calculate u = (A + VU**T + UV**T)*v
! For cache efficiency, we use only the upper half of the matrix tiles for this,
! For cache efficiency, we use only the upper half of the matrix tiles for this,
...
@@ -362,14 +331,7 @@
...
@@ -362,14 +331,7 @@
if (l_rows>0 .and. l_cols>0) then
if (l_rows>0 .and. l_cols>0) then
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
call timer%start("OpenMP parallel" // PRECISION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("OpenMP parallel_double")
#else
call timer%start("OpenMP parallel_single")
#endif
#endif
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,lcs,lce,j,lrs,lre)
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,lcs,lce,j,lrs,lre)
...
@@ -392,46 +354,24 @@
...
@@ -392,46 +354,24 @@
if (lre<lrs) cycle
if (lre<lrs) cycle
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
if (mod(n_iter,n_threads) == my_thread) then
if (mod(n_iter,n_threads) == my_thread) then
#ifdef DOUBLE_PRECISION_COMPLEX
call PRECISION_GEMV('C', lre-lrs+1 ,lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc_p(lcs,my_thread), 1)
call ZGEMV('C', lre-lrs+1 ,lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc_p(lcs,my_thread), 1)
if (i/=j) then
if (i/=j) then
call
Z
GEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur_p(lrs,my_thread), 1)
call
PRECISION_
GEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur_p(lrs,my_thread), 1)
endif
endif
#else
call CGEMV('C', lre-lrs+1 ,lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc_p(lcs,my_thread), 1)
if (i/=j) then
call CGEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur_p(lrs,my_thread), 1)
endif
#endif
endif
endif
n_iter = n_iter+1
n_iter = n_iter+1
#else /* WITH_OPENMP */
#else /* WITH_OPENMP */
call PRECISION_GEMV('C', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc(lcs), 1)
#ifdef DOUBLE_PRECISION_COMPLEX
call ZGEMV('C', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc(lcs), 1)
if (i/=j) then
if (i/=j) then
call
Z
GEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur(lrs), 1)
call
PRECISION_
GEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur(lrs), 1)
endif
endif
#else
call CGEMV('C', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc(lcs), 1)
if (i/=j) then
call CGEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur(lrs), 1)
endif
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP */
enddo
enddo
enddo
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
!$OMP END PARALLEL
!$OMP END PARALLEL
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%stop("OpenMP parallel_double")
#else
call timer%stop("OpenMP parallel_single")
#endif
#endif
do i=0,max_threads-1
do i=0,max_threads-1
uc(1:l_cols) = uc(1:l_cols) + uc_p(1:l_cols,i)
uc(1:l_cols) = uc(1:l_cols) + uc_p(1:l_cols,i)
...
@@ -440,13 +380,8 @@
...
@@ -440,13 +380,8 @@
#endif
#endif
if (nstor>0) then
if (nstor>0) then
#ifdef DOUBLE_PRECISION_COMPLEX
call PRECISION_GEMV('C', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), vr, 1, CZERO, aux, 1)
call ZGEMV('C', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), vr, 1, CZERO, aux, 1)
call PRECISION_GEMV('N', l_cols, 2*nstor, CONE, uvc, ubound(uvc,dim=1), aux, 1, CONE, uc, 1)
call ZGEMV('N', l_cols, 2*nstor, CONE, uvc, ubound(uvc,dim=1), aux, 1, CONE, uc, 1)
#else
call CGEMV('C', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), vr, 1, CZERO, aux, 1)
call CGEMV('N', l_cols, 2*nstor, CONE, uvc, ubound(uvc,dim=1), aux, 1, CONE, uc, 1)
#endif
endif
endif
endif
endif
...
@@ -457,15 +392,9 @@
...
@@ -457,15 +392,9 @@
! global tile size is smaller than the global remaining matrix
! global tile size is smaller than the global remaining matrix
if (tile_size < istep-1) then
if (tile_size < istep-1) then
#ifdef DOUBLE_PRECISION_COMPLEX
call elpa_reduce_add_vectors_complex_PRECISION (ur, ubound(ur,dim=1), mpi_comm_rows, &
call elpa_reduce_add_vectors_COMPLEX_double (ur, ubound(ur,dim=1), mpi_comm_rows, &
uc, ubound(uc,dim=1), mpi_comm_cols, &
(istep-1), 1, nblk)
#else
call elpa_reduce_add_vectors_COMPLEX_single (ur, ubound(ur,dim=1), mpi_comm_rows, &
uc, ubound(uc,dim=1), mpi_comm_cols, &
uc, ubound(uc,dim=1), mpi_comm_cols, &
(istep-1), 1, nblk)
(istep-1), 1, nblk)
#endif
endif
endif
! Sum up all the uc(:) parts, transpose uc -> ur
! Sum up all the uc(:) parts, transpose uc -> ur
...
@@ -476,12 +405,7 @@
...
@@ -476,12 +405,7 @@
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
call mpi_allreduce(tmp, uc, l_cols, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(tmp, uc, l_cols, MPI_DOUBLE_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#else
call mpi_allreduce(tmp, uc, l_cols, MPI_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
call timer%stop("mpi_communication")
#endif
#endif
...
@@ -494,16 +418,9 @@
...
@@ -494,16 +418,9 @@
! call elpa_transpose_vectors (uc, 2*ubound(uc,dim=1), mpi_comm_cols, &
! call elpa_transpose_vectors (uc, 2*ubound(uc,dim=1), mpi_comm_cols, &
! ur, 2*ubound(ur,dim=1), mpi_comm_rows, &
! ur, 2*ubound(ur,dim=1), mpi_comm_rows, &
! 1, 2*(istep-1), 1, 2*nblk)
! 1, 2*(istep-1), 1, 2*nblk)
#ifdef DOUBLE_PRECISION_COMPLEX
call elpa_transpose_vectors_complex_PRECISION (uc, ubound(uc,dim=1), mpi_comm_cols, &
call elpa_transpose_vectors_complex_double (uc, ubound(uc,dim=1), mpi_comm_cols, &
ur, ubound(ur,dim=1), mpi_comm_rows, &
1, (istep-1), 1, nblk)
#else
call elpa_transpose_vectors_complex_single (uc, ubound(uc,dim=1), mpi_comm_cols, &
ur, ubound(ur,dim=1), mpi_comm_rows, &
ur, ubound(ur,dim=1), mpi_comm_rows, &
1, (istep-1), 1, nblk)
1, (istep-1), 1, nblk)
#endif
! calculate u**T * v (same as v**T * (A + VU**T + UV**T) * v )
! calculate u**T * v (same as v**T * (A + VU**T + UV**T) * v )
...
@@ -513,12 +430,7 @@
...
@@ -513,12 +430,7 @@
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
call mpi_allreduce(xc, vav, 1 , MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_cols, mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(xc, vav, 1 , MPI_DOUBLE_COMPLEX, MPI_SUM, mpi_comm_cols, mpierr)
#else
call mpi_allreduce(xc, vav, 1 , MPI_COMPLEX, MPI_SUM, mpi_comm_cols, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
call timer%stop("mpi_communication")
#endif
#endif
...
@@ -551,15 +463,9 @@
...
@@ -551,15 +463,9 @@
lrs = 1
lrs = 1
lre = min(l_rows,(i+1)*l_rows_tile)
lre = min(l_rows,(i+1)*l_rows_tile)
if (lce<lcs .or. lre<lrs) cycle
if (lce<lcs .or. lre<lrs) cycle
#ifdef DOUBLE_PRECISION_COMPLEX
call PRECISION_GEMM('N', 'C', lre-lrs+1, lce-lcs+1, 2*nstor, CONE, &
call ZGEMM('N', 'C', lre-lrs+1, lce-lcs+1, 2*nstor, CONE, &
vur(lrs,1), ubound(vur,dim=1), uvc(lcs,1), ubound(uvc,dim=1), &
CONE, a(lrs,lcs), lda)
#else
call CGEMM('N', 'C', lre-lrs+1, lce-lcs+1, 2*nstor, CONE, &
vur(lrs,1), ubound(vur,dim=1), uvc(lcs,1), ubound(uvc,dim=1), &
vur(lrs,1), ubound(vur,dim=1), uvc(lcs,1), ubound(uvc,dim=1), &
CONE, a(lrs,lcs), lda)
CONE, a(lrs,lcs), lda)
#endif
enddo
enddo
nstor = 0
nstor = 0
...
@@ -580,11 +486,7 @@
...
@@ -580,11 +486,7 @@
if (my_prow==prow(1, nblk, np_rows)) then
if (my_prow==prow(1, nblk, np_rows)) then
! We use last l_cols value of loop above
! We use last l_cols value of loop above
vrl = a(1,l_cols)
vrl = a(1,l_cols)
#ifdef DOUBLE_PRECISION_COMPLEX
call hh_transform_complex_PRECISION(vrl, CONST_REAL_0_0, xf, tau(2))
call hh_transform_complex_double(vrl, 0.0_rk8, xf, tau(2))
#else
call hh_transform_complex_single(vrl, 0.0_rk4, xf, tau(2))
#endif
e(1) = vrl
e(1) = vrl
a(1,l_cols) = 1. ! for consistency only
a(1,l_cols) = 1. ! for consistency only
endif
endif
...
@@ -593,12 +495,7 @@
...
@@ -593,12 +495,7 @@
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
call mpi_bcast(tau(2), 1, MPI_COMPLEX_PRECISION, prow(1, nblk, np_rows), mpi_comm_rows, mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_bcast(tau(2), 1, MPI_DOUBLE_COMPLEX, prow(1, nblk, np_rows), mpi_comm_rows, mpierr)
#else
call mpi_bcast(tau(2), 1, MPI_COMPLEX, prow(1, nblk, np_rows), mpi_comm_rows, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
call timer%stop("mpi_communication")
#endif
#endif
...
@@ -610,12 +507,7 @@
...
@@ -610,12 +507,7 @@
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
call mpi_bcast(tau(2), 1, MPI_COMPLEX_PRECISION, pcol(2, nblk, np_cols), mpi_comm_cols, mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_bcast(tau(2), 1, MPI_DOUBLE_COMPLEX, pcol(2, nblk, np_cols), mpi_comm_cols, mpierr)
#else
call mpi_bcast(tau(2), 1, MPI_COMPLEX, pcol(2, nblk, np_cols), mpi_comm_cols, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
call timer%stop("mpi_communication")
#endif
#endif
...
@@ -642,26 +534,14 @@
...
@@ -642,26 +534,14 @@
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
tmpr = d
call mpi_allreduce(tmpr, d, na, MPI_REAL8, MPI_SUM, mpi_comm_rows, mpierr)
tmpr = d
tmpr = d
call mpi_allreduce(tmpr, d, na, MPI_REAL8 ,MPI_SUM, mpi_comm_cols, mpierr)
call mpi_allreduce(tmpr, d, na, MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
tmpr = e
call mpi_allreduce(tmpr, e, na, MPI_REAL8, MPI_SUM, mpi_comm_rows, mpierr)
tmpr = e
call mpi_allreduce(tmpr, e, na, MPI_REAL8, MPI_SUM, mpi_comm_cols, mpierr)
#else
tmpr = d
tmpr = d
call mpi_allreduce(tmpr, d, na, MPI_REAL4, MPI_SUM, mpi_comm_rows, mpierr)
call mpi_allreduce(tmpr, d, na, MPI_REAL_PRECISION ,MPI_SUM, mpi_comm_cols, mpierr)
tmpr = d
call mpi_allreduce(tmpr, d, na, MPI_REAL4 ,MPI_SUM, mpi_comm_cols, mpierr)
tmpr = e
tmpr = e
call mpi_allreduce(tmpr, e, na, MPI_REAL
4
, MPI_SUM, mpi_comm_rows, mpierr)
call mpi_allreduce(tmpr, e, na, MPI_REAL
_PRECISION
, MPI_SUM, mpi_comm_rows, mpierr)
tmpr = e
tmpr = e
call mpi_allreduce(tmpr, e, na, MPI_REAL4, MPI_SUM, mpi_comm_cols, mpierr)
call mpi_allreduce(tmpr, e, na, MPI_REAL_PRECISION, MPI_SUM, mpi_comm_cols, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
call timer%stop("mpi_communication")
#endif
#endif
...
@@ -673,25 +553,11 @@
...
@@ -673,25 +553,11 @@
stop
stop
endif
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("tridiag_complex" // PRECISION_SUFFIX)
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%stop("tridiag_complex_double")
#else
call timer%stop("tridiag_complex_single")
#endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine tridiag_complex_PRECISION
end subroutine tridiag_complex_double
#else
end subroutine tridiag_complex_single
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
subroutine trans_ev_complex_PRECISION(na, nqc, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
subroutine trans_ev_complex_double(na, nqc, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
#else
subroutine trans_ev_complex_single(na, nqc, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
#endif
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
! trans_ev_complex: Transforms the eigenvectors of a tridiagonal matrix back
! trans_ev_complex: Transforms the eigenvectors of a tridiagonal matrix back
! to the eigenvectors of the original matrix
! to the eigenvectors of the original matrix
...
@@ -725,6 +591,8 @@
...
@@ -725,6 +591,8 @@
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
use timings
use timings
#else
use timings_dummy
#endif
#endif
use precision
use precision
implicit none
implicit none
...
@@ -751,13 +619,8 @@
...
@@ -751,13 +619,8 @@
complex(kind=COMPLEX_DATATYPE), allocatable :: tmat(:,:), h1(:), h2(:)
complex(kind=COMPLEX_DATATYPE), allocatable :: tmat(:,:), h1(:), h2(:)
integer(kind=ik) :: istat
integer(kind=ik) :: istat
character(200) :: errorMessage
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("trans_ev_complex" // PRECISION_SUFFIX)
call timer%start("trans_ev_complex_double")
#else
call timer%start("trans_ev_complex_single")
#endif
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
call timer%start("mpi_communication")
#endif
#endif
...
@@ -830,11 +693,7 @@
...
@@ -830,11 +693,7 @@
! In the complex case tau(2) /= 0
! In the complex case tau(2) /= 0
if (my_prow == prow(1, nblk, np_rows)) then
if (my_prow == prow(1, nblk, np_rows)) then
#ifdef DOUBLE_PRECISION_COMPLEX
q(1,1:l_cols) = q(1,1:l_cols)*(CONE-tau(2))
q(1,1:l_cols) = q(1,1:l_cols)*((1.0_rk8,0.0_rk8)-tau(2))
#else
q(1,1:l_cols) = q(1,1:l_cols)*((1.0_rk4,0.0_rk4)-tau(2))
#endif
endif
endif
do istep=1,na,nblk
do istep=1,na,nblk
...
@@ -868,11 +727,7 @@
...
@@ -868,11 +727,7 @@
#endif
#endif
if (nb>0) &
if (nb>0) &
#ifdef DOUBLE_PRECISION_COMPLEX
call MPI_Bcast(hvb, nb, MPI_COMPLEX_PRECISION, cur_pcol, mpi_comm_cols, mpierr)
call MPI_Bcast(hvb, nb, MPI_DOUBLE_COMPLEX, cur_pcol, mpi_comm_cols, mpierr)
#else
call MPI_Bcast(hvb, nb, MPI_COMPLEX, cur_pcol, mpi_comm_cols, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
call timer%stop("mpi_communication")
#endif
#endif
...
@@ -894,11 +749,7 @@
...
@@ -894,11 +749,7 @@
tmat = 0
tmat = 0
if (l_rows>0) &
if (l_rows>0) &
#ifdef DOUBLE_PRECISION_COMPLEX
call PRECISION_HERK('U', 'C', nstor, l_rows, CONE, hvm, ubound(hvm,dim=1), CZERO, tmat, max_stored_rows)
call zherk('U', 'C', nstor, l_rows, CONE, hvm, ubound(hvm,dim=1), CZERO, tmat, max_stored_rows)
#else
call cherk('U', 'C', nstor, l_rows, CONE, hvm, ubound(hvm,dim=1), CZERO, tmat, max_stored_rows)
#endif
nc = 0
nc = 0