Commit 30cdd6e5 authored by Pavel Kus's avatar Pavel Kus

elpa2_bandred_complex single/double unified

parent 1d452c16
......@@ -19,6 +19,7 @@ simple_tokens = [
"qr_pdgeqrf_2dcomm_PRECISION",
"hh_transform_NUMBER_PRECISION",
"symm_matrix_allreduce_PRECISION",
"herm_matrix_allreduce_PRECISION",
"redist_band_NUMBER_PRECISION",
"unpack_row_NUMBER_cpu_PRECISION",
"unpack_row_NUMBER_cpu_openmp_PRECISION",
......@@ -94,6 +95,7 @@ explicit_tokens_complex = [
("CONST_COMPLEX_PAIR_0_0", "(0.0_rk8,0.0_rk8)", "(0.0_rk4,0.0_rk4)"),
("CONST_COMPLEX_PAIR_1_0", "(1.0_rk8,0.0_rk8)", "(1.0_rk4,0.0_rk4)"),
("CONST_COMPLEX_PAIR_NEGATIVE_1_0", "(-1.0_rk8,0.0_rk8)", "(-1.0_rk4,0.0_rk4)"),
("CONST_COMPLEX_PAIR_NEGATIVE_0_5", "(-0.5_rk8,0.0_rk8)", "(-0.5_rk4,0.0_rk4)"),
("CONST_COMPLEX_0_0", "0.0_ck8", "0.0_ck4"),
("CONST_COMPLEX_1_0", "1.0_ck8", "1.0_ck4"),
("size_of_PRECISION_complex", "size_of_double_complex_datatype", "size_of_single_complex_datatype"),
......
This diff is collapsed.
#ifdef DOUBLE_PRECISION_COMPLEX
subroutine herm_matrix_allreduce_double(n,a,lda,ldb,comm)
#else
subroutine herm_matrix_allreduce_single(n,a,lda,ldb,comm)
#endif
subroutine herm_matrix_allreduce_PRECISION(n,a,lda,ldb,comm)
!-------------------------------------------------------------------------------
! herm_matrix_allreduce: Does an mpi_allreduce for a hermitian matrix A.
! On entry, only the upper half of A needs to be set
! On exit, the complete matrix is set
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
......@@ -20,13 +18,7 @@
integer(kind=ik) :: i, nc, mpierr
complex(kind=COMPLEX_DATATYPE) :: h1(n*n), h2(n*n)
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("herm_matrix_allreduce_double")
#else
call timer%start("herm_matrix_allreduce_single")
#endif
#endif
call timer%start("herm_matrix_allreduce" // PRECISION_SUFFIX)
nc = 0
do i=1,n
......@@ -34,18 +26,9 @@
nc = nc+i
enddo
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(h1, h2, nc, MPI_DOUBLE_COMPLEX, MPI_SUM, comm, mpierr)
#else
call mpi_allreduce(h1, h2, nc, MPI_COMPLEX, MPI_SUM, comm, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call mpi_allreduce(h1, h2, nc, MPI_COMPLEX_PRECISION, MPI_SUM, comm, mpierr)
call timer%stop("mpi_communication")
#endif
nc = 0
do i=1,n
......@@ -75,18 +58,8 @@
! nc = nc+i
! enddo
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%stop("herm_matrix_allreduce_double")
#else
call timer%stop("herm_matrix_allreduce_single")
#endif
#endif
call timer%stop("herm_matrix_allreduce" // PRECISION_SUFFIX)
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine herm_matrix_allreduce_double
#else
end subroutine herm_matrix_allreduce_single
#endif
end subroutine herm_matrix_allreduce_PRECISION
......@@ -14,6 +14,7 @@
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef hh_transform_complex_PRECISION
#undef symm_matrix_allreduce_PRECISION
#undef herm_matrix_allreduce_PRECISION
#undef redist_band_complex_PRECISION
#undef unpack_row_complex_cpu_PRECISION
#undef unpack_row_complex_cpu_openmp_PRECISION
......@@ -83,6 +84,7 @@
#undef CONST_COMPLEX_PAIR_0_0
#undef CONST_COMPLEX_PAIR_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_0_5
#undef CONST_COMPLEX_0_0
#undef CONST_COMPLEX_1_0
#undef size_of_PRECISION_complex
......@@ -101,6 +103,7 @@
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_double
#define hh_transform_complex_PRECISION hh_transform_complex_double
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_double
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_double
#define redist_band_complex_PRECISION redist_band_complex_double
#define unpack_row_complex_cpu_PRECISION unpack_row_complex_cpu_double
#define unpack_row_complex_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_double
......@@ -170,6 +173,7 @@
#define CONST_COMPLEX_PAIR_0_0 (0.0_rk8,0.0_rk8)
#define CONST_COMPLEX_PAIR_1_0 (1.0_rk8,0.0_rk8)
#define CONST_COMPLEX_PAIR_NEGATIVE_1_0 (-1.0_rk8,0.0_rk8)
#define CONST_COMPLEX_PAIR_NEGATIVE_0_5 (-0.5_rk8,0.0_rk8)
#define CONST_COMPLEX_0_0 0.0_ck8
#define CONST_COMPLEX_1_0 1.0_ck8
#define size_of_PRECISION_complex size_of_double_complex_datatype
......@@ -189,6 +193,7 @@
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef hh_transform_complex_PRECISION
#undef symm_matrix_allreduce_PRECISION
#undef herm_matrix_allreduce_PRECISION
#undef redist_band_complex_PRECISION
#undef unpack_row_complex_cpu_PRECISION
#undef unpack_row_complex_cpu_openmp_PRECISION
......@@ -258,6 +263,7 @@
#undef CONST_COMPLEX_PAIR_0_0
#undef CONST_COMPLEX_PAIR_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_0_5
#undef CONST_COMPLEX_0_0
#undef CONST_COMPLEX_1_0
#undef size_of_PRECISION_complex
......@@ -276,6 +282,7 @@
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_single
#define hh_transform_complex_PRECISION hh_transform_complex_single
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_single
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_single
#define redist_band_complex_PRECISION redist_band_complex_single
#define unpack_row_complex_cpu_PRECISION unpack_row_complex_cpu_single
#define unpack_row_complex_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_single
......@@ -345,6 +352,7 @@
#define CONST_COMPLEX_PAIR_0_0 (0.0_rk4,0.0_rk4)
#define CONST_COMPLEX_PAIR_1_0 (1.0_rk4,0.0_rk4)
#define CONST_COMPLEX_PAIR_NEGATIVE_1_0 (-1.0_rk4,0.0_rk4)
#define CONST_COMPLEX_PAIR_NEGATIVE_0_5 (-0.5_rk4,0.0_rk4)
#define CONST_COMPLEX_0_0 0.0_ck4
#define CONST_COMPLEX_1_0 1.0_ck4
#define size_of_PRECISION_complex size_of_single_complex_datatype
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment