From b15d27f259260205c2d3c06de3ad4bffdb1c9d1c Mon Sep 17 00:00:00 2001 From: Andreas Marek Date: Tue, 21 Mar 2017 11:43:37 +0100 Subject: [PATCH] Unify pack_unpack_cpu --- Makefile.am | 5 +- src/elpa2_trans_ev_tridi_to_band_template.X90 | 3 +- src/mod_pack_unpack_complex.F90 | 309 ----------------- src/mod_pack_unpack_cpu.F90 | 109 ++++++ src/mod_pack_unpack_real.F90 | 315 ------------------ src/pack_unpack_cpu.X90 | 238 +++++++++++++ src/precision_macros.h | 2 +- 7 files changed, 352 insertions(+), 629 deletions(-) delete mode 100644 src/mod_pack_unpack_complex.F90 create mode 100644 src/mod_pack_unpack_cpu.F90 delete mode 100644 src/mod_pack_unpack_real.F90 create mode 100644 src/pack_unpack_cpu.X90 diff --git a/Makefile.am b/Makefile.am index 64cdd9a3..ded4536e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -27,10 +27,9 @@ libelpa@SUFFIX@_private_la_SOURCES = \ src/mod_mpi.F90 \ src/mod_mpi_stubs.F90 \ src/mod_redist_band.F90 \ - src/mod_pack_unpack_real.F90 \ + src/mod_pack_unpack_cpu.F90 \ src/mod_compute_hh_trafo_real.F90 \ src/mod_compute_hh_trafo_complex.F90 \ - src/mod_pack_unpack_complex.F90 \ src/aligned_mem.F90 \ src/elpa1_compute_private.F90 \ src/elpa2_determine_workload.F90 \ @@ -63,6 +62,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \ src/elpa2_kernels/elpa2_kernels_real_template.X90 \ src/elpa2_kernels/elpa2_kernels_complex_template.X90 \ src/elpa2_kernels/elpa2_kernels_simple_template.X90 \ + src/pack_unpack_cpu.X90 \ src/redist_band.X90 \ src/sanity.X90 \ src/elpa_cholesky_template.X90 \ @@ -981,6 +981,7 @@ EXTRA_DIST = \ src/elpa2_kernels/elpa2_kernels_complex_template.X90 \ src/elpa2_kernels/elpa2_kernels_simple_template.X90 \ src/redist_band.X90 \ + src/pack_unpack_cpu.X90 \ src/sanity.X90 \ src/elpa_cholesky_template.X90 \ src/elpa_invert_trm.X90 \ diff --git a/src/elpa2_trans_ev_tridi_to_band_template.X90 b/src/elpa2_trans_ev_tridi_to_band_template.X90 index 163d3fc2..6243bbad 100644 --- a/src/elpa2_trans_ev_tridi_to_band_template.X90 +++ b/src/elpa2_trans_ev_tridi_to_band_template.X90 @@ -45,14 +45,13 @@ use timings_dummy #endif use elpa2_workload + use pack_unpack_cpu #if REALCASE == 1 - use pack_unpack_real use pack_unpack_real_gpu use compute_hh_trafo_real #endif #if COMPLEXCASE == 1 - use pack_unpack_complex use compute_hh_trafo_complex #endif use cuda_functions diff --git a/src/mod_pack_unpack_complex.F90 b/src/mod_pack_unpack_complex.F90 deleted file mode 100644 index 0e7c66f7..00000000 --- a/src/mod_pack_unpack_complex.F90 +++ /dev/null @@ -1,309 +0,0 @@ -! This file is part of ELPA. -! -! The ELPA library was originally created by the ELPA consortium, -! consisting of the following organizations: -! -! - Max Planck Computing and Data Facility (MPCDF), formerly known as -! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), -! - Bergische Universität Wuppertal, Lehrstuhl für angewandte -! Informatik, -! - Technische Universität München, Lehrstuhl für Informatik mit -! Schwerpunkt Wissenschaftliches Rechnen , -! - Fritz-Haber-Institut, Berlin, Abt. Theorie, -! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, -! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, -! and -! - IBM Deutschland GmbH -! -! -! More information can be found here: -! http://elpa.mpcdf.mpg.de/ -! -! ELPA is free software: you can redistribute it and/or modify -! it under the terms of the version 3 of the license of the -! GNU Lesser General Public License as published by the Free -! Software Foundation. -! -! ELPA is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -! GNU Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public License -! along with ELPA. If not, see -! -! ELPA reflects a substantial effort on the part of the original -! ELPA consortium, and we ask you to respect the spirit of the -! license that we chose: i.e., please contribute any changes you -! may have back to the original ELPA library distribution, and keep -! any derivatives of ELPA under the same license that we chose for -! the original distribution, the GNU Lesser General Public License. -! -! This file was written by A. Marek, MPCDF - -module pack_unpack_complex -#include "config-f90.h" - implicit none - -#ifdef WITH_OPENMP - public pack_row_complex_cpu_openmp_double -#else - public pack_row_complex_cpu_double -#endif - -#ifdef WANT_SINGLE_PRECISION_COMPLEX - -#ifdef WITH_OPENMP - public pack_row_complex_cpu_openmp_single -#else - public pack_row_complex_cpu_single -#endif - -#endif /* WANT_SINGLE_PRECISION_COMPLEX */ - - contains -#ifdef WITH_OPENMP - subroutine pack_row_complex_cpu_openmp_double(a, row, n, stripe_width, stripe_count, max_threads, thread_width, l_nev) -#else - subroutine pack_row_complex_cpu_double(a, row, n, stripe_width, last_stripe_width, stripe_count) -#endif - -#ifdef HAVE_DETAILED_TIMINGS - use timings -#endif - use precision - implicit none -#ifdef WITH_OPENMP - integer(kind=ik), intent(in) :: stripe_width, stripe_count, max_threads, thread_width, l_nev - complex(kind=ck8), intent(in) :: a(:,:,:,:) -#else - integer(kind=ik), intent(in) :: stripe_width, last_stripe_width, stripe_count - complex(kind=ck8), intent(in) :: a(:,:,:) -#endif - complex(kind=ck8) :: row(:) - integer(kind=ik) :: n, i, noff, nl, nt - -#ifdef HAVE_DETAILED_TIMINGS -#ifdef WITH_OPENMP - call timer%start("pack_row_complex_cpu_openmp_double") -#else - call timer%start("pack_row_complex_cpu_double") -#endif -#endif - -#ifdef WITH_OPENMP - do nt = 1, max_threads - do i = 1, stripe_count - noff = (nt-1)*thread_width + (i-1)*stripe_width - nl = min(stripe_width, nt*thread_width-noff, l_nev-noff) - if (nl<=0) exit - row(noff+1:noff+nl) = a(1:nl,n,i,nt) - enddo - enddo -#else - do i=1,stripe_count - nl = merge(stripe_width, last_stripe_width, i +! +! ELPA reflects a substantial effort on the part of the original +! ELPA consortium, and we ask you to respect the spirit of the +! license that we chose: i.e., please contribute any changes you +! may have back to the original ELPA library distribution, and keep +! any derivatives of ELPA under the same license that we chose for +! the original distribution, the GNU Lesser General Public License. +! +! This file was written by A. Marek, MPCDF + +module pack_unpack_cpu +#include "config-f90.h" + implicit none + + private + +#ifdef WITH_OPENMP + public pack_row_real_cpu_openmp_double, unpack_row_real_cpu_openmp_double + public pack_row_complex_cpu_openmp_double, unpack_row_complex_cpu_openmp_double +#else + public pack_row_real_cpu_double, unpack_row_real_cpu_double + public pack_row_complex_cpu_double, unpack_row_complex_cpu_double +#endif + +#ifdef WANT_SINGLE_PRECISION_COMPLEX + +#ifdef WITH_OPENMP + public pack_row_real_cpu_openmp_single, unpack_row_real_cpu_openmp_single + public pack_row_complex_cpu_openmp_single, unpack_row_complex_cpu_openmp_single +#else + public pack_row_real_cpu_single, unpack_row_real_cpu_single + public pack_row_complex_cpu_single, unpack_row_complex_cpu_single +#endif + +#endif /* WANT_SINGLE_PRECISION_COMPLEX */ + + contains + + !real double precision +#define REALCASE 1 +#define DOUBLE_PRECISION 1 +#include "precision_macros.h" +#include "pack_unpack_cpu.X90" +#undef REALCASE +#undef DOUBLE_PRECISION + + ! real single precision +#if defined(WANT_SINGLE_PRECISION_REAL) +#define REALCASE 1 +#define SINGLE_PRECISION 1 +#include "precision_macros.h" +#include "pack_unpack_cpu.X90" +#undef REALCASE +#undef SINGLE_PRECISION +#endif + + !complex double precision +#define COMPLEXCASE 1 +#define DOUBLE_PRECISION 1 +#include "precision_macros.h" +#include "pack_unpack_cpu.X90" +#undef COMPLEXCASE +#undef DOUBLE_PRECISION + + ! complex single precision +#if defined(WANT_SINGLE_PRECISION_COMPLEX) +#define COMPLEXCASE 1 +#define SINGLE_PRECISION 1 +#include "precision_macros.h" +#include "pack_unpack_cpu.X90" +#undef COMPLEXCASE +#undef SINGLE_PRECISION +#endif + + +end module diff --git a/src/mod_pack_unpack_real.F90 b/src/mod_pack_unpack_real.F90 deleted file mode 100644 index c0a5ba66..00000000 --- a/src/mod_pack_unpack_real.F90 +++ /dev/null @@ -1,315 +0,0 @@ -! This file is part of ELPA. -! -! The ELPA library was originally created by the ELPA consortium, -! consisting of the following organizations: -! -! - Max Planck Computing and Data Facility (MPCDF), formerly known as -! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), -! - Bergische Universität Wuppertal, Lehrstuhl für angewandte -! Informatik, -! - Technische Universität München, Lehrstuhl für Informatik mit -! Schwerpunkt Wissenschaftliches Rechnen , -! - Fritz-Haber-Institut, Berlin, Abt. Theorie, -! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, -! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, -! and -! - IBM Deutschland GmbH -! -! -! More information can be found here: -! http://elpa.mpcdf.mpg.de/ -! -! ELPA is free software: you can redistribute it and/or modify -! it under the terms of the version 3 of the license of the -! GNU Lesser General Public License as published by the Free -! Software Foundation. -! -! ELPA is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -! GNU Lesser General Public License for more details. -! -! You should have received a copy of the GNU Lesser General Public License -! along with ELPA. If not, see -! -! ELPA reflects a substantial effort on the part of the original -! ELPA consortium, and we ask you to respect the spirit of the -! license that we chose: i.e., please contribute any changes you -! may have back to the original ELPA library distribution, and keep -! any derivatives of ELPA under the same license that we chose for -! the original distribution, the GNU Lesser General Public License. -! -! This file was written by A. Marek, MPCDF - -module pack_unpack_real -#include "config-f90.h" - implicit none - -#ifdef WITH_OPENMP - public pack_row_real_cpu_openmp_double, unpack_row_real_cpu_openmp_double -#else - public pack_row_real_cpu_double, unpack_row_real_cpu_double -#endif - contains - -#ifdef WITH_OPENMP - subroutine pack_row_real_cpu_openmp_double(a, row, n, stripe_width, stripe_count, max_threads, thread_width, l_nev) -#else - subroutine pack_row_real_cpu_double(a, row, n, stripe_width, last_stripe_width, stripe_count) -#endif - -#ifdef HAVE_DETAILED_TIMINGS - use timings -#endif - use precision - implicit none - integer(kind=ik), intent(in) :: n, stripe_count, stripe_width -#ifdef WITH_OPENMP - integer(kind=ik), intent(in) :: max_threads, thread_width, l_nev - real(kind=rk8), intent(in) :: a(:,:,:,:) -#else - integer(kind=ik), intent(in) :: last_stripe_width - real(kind=rk8), intent(in) :: a(:,:,:) -#endif - real(kind=rk8) :: row(:) - - integer(kind=ik) :: i, noff, nl -#ifdef WITH_OPENMP - integer(kind=ik) :: nt -#endif - -#ifdef HAVE_DETAILED_TIMINGS -#ifdef WITH_OPENMP - call timer%start("pack_row_real_cpu_openmp_double") - -#else - call timer%start("pack_row_real_cpu_double") -#endif -#endif - -#ifdef WITH_OPENMP - do nt = 1, max_threads - do i = 1, stripe_count - noff = (nt-1)*thread_width + (i-1)*stripe_width - nl = min(stripe_width, nt*thread_width-noff, l_nev-noff) - if (nl<=0) exit - row(noff+1:noff+nl) = a(1:nl,n,i,nt) - enddo - enddo -#else - do i=1,stripe_count - nl = merge(stripe_width, last_stripe_width, i +! +! ELPA reflects a substantial effort on the part of the original +! ELPA consortium, and we ask you to respect the spirit of the +! license that we chose: i.e., please contribute any changes you +! may have back to the original ELPA library distribution, and keep +! any derivatives of ELPA under the same license that we chose for +! the original distribution, the GNU Lesser General Public License. +! +! This file was written by A. Marek, MPCDF +#endif + + subroutine pack_row_& + &MATH_DATATYPE& +#ifdef WITH_OPENMP + &_cpu_openmp_& +#else + &_cpu_& +#endif + &PRECISION & + (a, row, n, stripe_width, & +#ifdef WITH_OPENMP + stripe_count, max_threads, thread_width, l_nev) +#else + last_stripe_width, stripe_count) +#endif + +#ifdef HAVE_DETAILED_TIMINGS + use timings +#else + use timings_dummy +#endif + use precision + implicit none + integer(kind=ik), intent(in) :: n, stripe_count, stripe_width +#ifdef WITH_OPENMP + integer(kind=ik), intent(in) :: max_threads, thread_width, l_nev + +#if REALCASE == 1 + real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:) +#endif +#if COMPLEXCASE == 1 + complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:) +#endif + +#else /* WITH_OPENMP */ + integer(kind=ik), intent(in) :: last_stripe_width +#if REALCASE == 1 + real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:) +#endif +#if COMPLEXCASE == 1 + complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:) +#endif + +#endif /* WITH_OPENMP */ + +#if REALCASE == 1 + real(kind=C_DATATYPE_KIND) :: row(:) +#endif +#if COMPLEXCASE == 1 + complex(kind=C_DATATYPE_KIND) :: row(:) +#endif + + integer(kind=ik) :: i, noff, nl +#ifdef WITH_OPENMP + integer(kind=ik) :: nt +#endif + + call timer%start("pack_row_& + &MATH_DATATPE& +#ifdef WITH_OPENMP + &_cpu_openmp" // & +#else + &_cpu" // & +#endif + &PRECISION_SUFFIX & + ) + +#ifdef WITH_OPENMP + do nt = 1, max_threads + do i = 1, stripe_count + noff = (nt-1)*thread_width + (i-1)*stripe_width + nl = min(stripe_width, nt*thread_width-noff, l_nev-noff) + if (nl<=0) exit + row(noff+1:noff+nl) = a(1:nl,n,i,nt) + enddo + enddo +#else + do i=1,stripe_count + nl = merge(stripe_width, last_stripe_width, i