Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
30cdd6e5
Commit
30cdd6e5
authored
Dec 06, 2016
by
Pavel Kus
Browse files
elpa2_bandred_complex single/double unified
parent
1d452c16
Changes
4
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
generate/generate_precision.py
View file @
30cdd6e5
...
...
@@ -19,6 +19,7 @@ simple_tokens = [
"qr_pdgeqrf_2dcomm_PRECISION"
,
"hh_transform_NUMBER_PRECISION"
,
"symm_matrix_allreduce_PRECISION"
,
"herm_matrix_allreduce_PRECISION"
,
"redist_band_NUMBER_PRECISION"
,
"unpack_row_NUMBER_cpu_PRECISION"
,
"unpack_row_NUMBER_cpu_openmp_PRECISION"
,
...
...
@@ -94,6 +95,7 @@ explicit_tokens_complex = [
(
"CONST_COMPLEX_PAIR_0_0"
,
"(0.0_rk8,0.0_rk8)"
,
"(0.0_rk4,0.0_rk4)"
),
(
"CONST_COMPLEX_PAIR_1_0"
,
"(1.0_rk8,0.0_rk8)"
,
"(1.0_rk4,0.0_rk4)"
),
(
"CONST_COMPLEX_PAIR_NEGATIVE_1_0"
,
"(-1.0_rk8,0.0_rk8)"
,
"(-1.0_rk4,0.0_rk4)"
),
(
"CONST_COMPLEX_PAIR_NEGATIVE_0_5"
,
"(-0.5_rk8,0.0_rk8)"
,
"(-0.5_rk4,0.0_rk4)"
),
(
"CONST_COMPLEX_0_0"
,
"0.0_ck8"
,
"0.0_ck4"
),
(
"CONST_COMPLEX_1_0"
,
"1.0_ck8"
,
"1.0_ck4"
),
(
"size_of_PRECISION_complex"
,
"size_of_double_complex_datatype"
,
"size_of_single_complex_datatype"
),
...
...
src/elpa2_bandred_complex_template.X90
View file @
30cdd6e5
This diff is collapsed.
Click to expand it.
src/elpa2_herm_matrix_allreduce_complex_template.X90
View file @
30cdd6e5
#ifdef DOUBLE_PRECISION_COMPLEX
subroutine herm_matrix_allreduce_double(n,a,lda,ldb,comm)
#else
subroutine herm_matrix_allreduce_single(n,a,lda,ldb,comm)
#endif
subroutine herm_matrix_allreduce_PRECISION(n,a,lda,ldb,comm)
!-------------------------------------------------------------------------------
! herm_matrix_allreduce: Does an mpi_allreduce for a hermitian matrix A.
! On entry, only the upper half of A needs to be set
! On exit, the complete matrix is set
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
...
...
@@ -20,13 +18,7 @@
integer(kind=ik) :: i, nc, mpierr
complex(kind=COMPLEX_DATATYPE) :: h1(n*n), h2(n*n)
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("herm_matrix_allreduce_double")
#else
call timer%start("herm_matrix_allreduce_single")
#endif
#endif
call timer%start("herm_matrix_allreduce" // PRECISION_SUFFIX)
nc = 0
do i=1,n
...
...
@@ -34,18 +26,9 @@
nc = nc+i
enddo
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(h1, h2, nc, MPI_DOUBLE_COMPLEX, MPI_SUM, comm, mpierr)
#else
call mpi_allreduce(h1, h2, nc, MPI_COMPLEX, MPI_SUM, comm, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call mpi_allreduce(h1, h2, nc, MPI_COMPLEX_PRECISION, MPI_SUM, comm, mpierr)
call timer%stop("mpi_communication")
#endif
nc = 0
do i=1,n
...
...
@@ -75,18 +58,8 @@
! nc = nc+i
! enddo
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%stop("herm_matrix_allreduce_double")
#else
call timer%stop("herm_matrix_allreduce_single")
#endif
#endif
call timer%stop("herm_matrix_allreduce" // PRECISION_SUFFIX)
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine herm_matrix_allreduce_double
#else
end subroutine herm_matrix_allreduce_single
#endif
end subroutine herm_matrix_allreduce_PRECISION
src/precision_macros_complex.h
View file @
30cdd6e5
...
...
@@ -14,6 +14,7 @@
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef hh_transform_complex_PRECISION
#undef symm_matrix_allreduce_PRECISION
#undef herm_matrix_allreduce_PRECISION
#undef redist_band_complex_PRECISION
#undef unpack_row_complex_cpu_PRECISION
#undef unpack_row_complex_cpu_openmp_PRECISION
...
...
@@ -83,6 +84,7 @@
#undef CONST_COMPLEX_PAIR_0_0
#undef CONST_COMPLEX_PAIR_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_0_5
#undef CONST_COMPLEX_0_0
#undef CONST_COMPLEX_1_0
#undef size_of_PRECISION_complex
...
...
@@ -101,6 +103,7 @@
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_double
#define hh_transform_complex_PRECISION hh_transform_complex_double
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_double
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_double
#define redist_band_complex_PRECISION redist_band_complex_double
#define unpack_row_complex_cpu_PRECISION unpack_row_complex_cpu_double
#define unpack_row_complex_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_double
...
...
@@ -170,6 +173,7 @@
#define CONST_COMPLEX_PAIR_0_0 (0.0_rk8,0.0_rk8)
#define CONST_COMPLEX_PAIR_1_0 (1.0_rk8,0.0_rk8)
#define CONST_COMPLEX_PAIR_NEGATIVE_1_0 (-1.0_rk8,0.0_rk8)
#define CONST_COMPLEX_PAIR_NEGATIVE_0_5 (-0.5_rk8,0.0_rk8)
#define CONST_COMPLEX_0_0 0.0_ck8
#define CONST_COMPLEX_1_0 1.0_ck8
#define size_of_PRECISION_complex size_of_double_complex_datatype
...
...
@@ -189,6 +193,7 @@
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef hh_transform_complex_PRECISION
#undef symm_matrix_allreduce_PRECISION
#undef herm_matrix_allreduce_PRECISION
#undef redist_band_complex_PRECISION
#undef unpack_row_complex_cpu_PRECISION
#undef unpack_row_complex_cpu_openmp_PRECISION
...
...
@@ -258,6 +263,7 @@
#undef CONST_COMPLEX_PAIR_0_0
#undef CONST_COMPLEX_PAIR_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_1_0
#undef CONST_COMPLEX_PAIR_NEGATIVE_0_5
#undef CONST_COMPLEX_0_0
#undef CONST_COMPLEX_1_0
#undef size_of_PRECISION_complex
...
...
@@ -276,6 +282,7 @@
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_single
#define hh_transform_complex_PRECISION hh_transform_complex_single
#define symm_matrix_allreduce_PRECISION symm_matrix_allreduce_single
#define herm_matrix_allreduce_PRECISION herm_matrix_allreduce_single
#define redist_band_complex_PRECISION redist_band_complex_single
#define unpack_row_complex_cpu_PRECISION unpack_row_complex_cpu_single
#define unpack_row_complex_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_single
...
...
@@ -345,6 +352,7 @@
#define CONST_COMPLEX_PAIR_0_0 (0.0_rk4,0.0_rk4)
#define CONST_COMPLEX_PAIR_1_0 (1.0_rk4,0.0_rk4)
#define CONST_COMPLEX_PAIR_NEGATIVE_1_0 (-1.0_rk4,0.0_rk4)
#define CONST_COMPLEX_PAIR_NEGATIVE_0_5 (-0.5_rk4,0.0_rk4)
#define CONST_COMPLEX_0_0 0.0_ck4
#define CONST_COMPLEX_1_0 1.0_ck4
#define size_of_PRECISION_complex size_of_single_complex_datatype
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment