Commit eb2e253c authored by Andreas Marek's avatar Andreas Marek
Browse files

Make skew-symmetric enabled by default

parent fd5322d1
...@@ -15,6 +15,7 @@ issues ...@@ -15,6 +15,7 @@ issues
already diagonal which the ELPA algorithms do not support already diagonal which the ELPA algorithms do not support
- BUG FIX in internal test programs: do not consider a residual of 0.0 to be - BUG FIX in internal test programs: do not consider a residual of 0.0 to be
an error an error
- support for skew-symmetric matrices now enabled by default
Changelog for ELPA 2020.11.001 Changelog for ELPA 2020.11.001
......
...@@ -2020,8 +2020,6 @@ fi ...@@ -2020,8 +2020,6 @@ fi
AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"]) AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"])
AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"]) AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"])
#always define SKEWSYMMETRIC for the moment
AC_MSG_CHECKING(whether we should enable skew-symmetric support) AC_MSG_CHECKING(whether we should enable skew-symmetric support)
AC_ARG_ENABLE([skew-symmetric-support], AC_ARG_ENABLE([skew-symmetric-support],
AS_HELP_STRING([--enable-skew-symmetric-support], AS_HELP_STRING([--enable-skew-symmetric-support],
...@@ -2031,7 +2029,7 @@ AC_ARG_ENABLE([skew-symmetric-support], ...@@ -2031,7 +2029,7 @@ AC_ARG_ENABLE([skew-symmetric-support],
else else
enable_skewsymmetric=no enable_skewsymmetric=no
fi], fi],
[enable_skewsymmetric=no]) [enable_skewsymmetric=yes])
AC_MSG_RESULT([${enable_skewsymmetric}]) AC_MSG_RESULT([${enable_skewsymmetric}])
AM_CONDITIONAL([HAVE_SKEWSYMMETRIC],[test x"$enable_skewsymmetric" = x"yes"]) AM_CONDITIONAL([HAVE_SKEWSYMMETRIC],[test x"$enable_skewsymmetric" = x"yes"])
if test x"${enable_skewsymmetric}" = x"yes"; then if test x"${enable_skewsymmetric}" = x"yes"; then
......
...@@ -362,7 +362,8 @@ print(" " + " \\\n ".join([ ...@@ -362,7 +362,8 @@ print(" " + " \\\n ".join([
prec_flag['double']])) prec_flag['double']]))
print("endif") print("endif")
name = "test_skewsymmetric_real_double" name = "validate_skewsymmetric_real_double"
print("if HAVE_SKEWSYMMETRIC")
print("check_SCRIPTS += " + name + "_extended.sh") print("check_SCRIPTS += " + name + "_extended.sh")
print("noinst_PROGRAMS += " + name) print("noinst_PROGRAMS += " + name)
print(name + "_SOURCES = test/Fortran/test_skewsymmetric.F90") print(name + "_SOURCES = test/Fortran/test_skewsymmetric.F90")
...@@ -371,8 +372,10 @@ print(name + "_FCFLAGS = $(test_program_fcflags) \\") ...@@ -371,8 +372,10 @@ print(name + "_FCFLAGS = $(test_program_fcflags) \\")
print(" " + " \\\n ".join([ print(" " + " \\\n ".join([
domain_flag['real'], domain_flag['real'],
prec_flag['double']])) prec_flag['double']]))
print("endif")
name = "test_skewsymmetric_real_single" name = "validate_skewsymmetric_real_single"
print("if HAVE_SKEWSYMMETRIC")
print("if WANT_SINGLE_PRECISION_REAL") print("if WANT_SINGLE_PRECISION_REAL")
print("check_SCRIPTS += " + name + "_extended.sh") print("check_SCRIPTS += " + name + "_extended.sh")
print("noinst_PROGRAMS += " + name) print("noinst_PROGRAMS += " + name)
...@@ -383,6 +386,7 @@ print(" " + " \\\n ".join([ ...@@ -383,6 +386,7 @@ print(" " + " \\\n ".join([
domain_flag['real'], domain_flag['real'],
prec_flag['single']])) prec_flag['single']]))
print("endif") print("endif")
print("endif")
......
...@@ -105,6 +105,16 @@ module elpa1_impl ...@@ -105,6 +105,16 @@ module elpa1_impl
public :: elpa_solve_evp_complex_1stage_single_impl !< Driver routine for complex 1-stage eigenvalue problem public :: elpa_solve_evp_complex_1stage_single_impl !< Driver routine for complex 1-stage eigenvalue problem
#endif #endif
#ifdef HAVE_SKEWSYMMETRIC
public :: elpa_solve_skew_evp_real_1stage_double_impl !< Driver routine for real double-precision 1-stage skew-symmetric eigenvalue problem
#ifdef WANT_SINGLE_PRECISION_REAL
public :: elpa_solve_skew_evp_real_1stage_single_impl !< Driver routine for real single-precision 1-stage skew-symmetric eigenvalue problem
#endif
#endif /* HAVE_SKEWSYMMETRIC */
! imported from elpa1_auxilliary ! imported from elpa1_auxilliary
public :: elpa_mult_at_b_real_double_impl !< Multiply double-precision real matrices A**T * B public :: elpa_mult_at_b_real_double_impl !< Multiply double-precision real matrices A**T * B
...@@ -168,6 +178,7 @@ contains ...@@ -168,6 +178,7 @@ contains
!> \result success !> \result success
#define REALCASE 1 #define REALCASE 1
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa1_template.F90" #include "elpa1_template.F90"
#undef REALCASE #undef REALCASE
...@@ -205,6 +216,7 @@ contains ...@@ -205,6 +216,7 @@ contains
#define REALCASE 1 #define REALCASE 1
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa1_template.F90" #include "elpa1_template.F90"
#undef REALCASE #undef REALCASE
...@@ -241,6 +253,7 @@ contains ...@@ -241,6 +253,7 @@ contains
!> \result success !> \result success
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa1_template.F90" #include "elpa1_template.F90"
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
...@@ -280,10 +293,93 @@ contains ...@@ -280,10 +293,93 @@ contains
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define SINGLE_PRECISION #define SINGLE_PRECISION
#undef ACTIVATE_SKEW
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa1_template.F90" #include "elpa1_template.F90"
#undef COMPLEXCASE #undef COMPLEXCASE
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_COMPLEX */ #endif /* WANT_SINGLE_PRECISION_COMPLEX */
#ifdef HAVE_SKEWSYMMETRIC
!> \brief elpa_solve_skew_evp_real_1stage_double_impl: Fortran function to solve the real double-precision skew-symmetric eigenvalue problem with 1-stage solver
!>
!> \details
!> \param obj elpa_t object contains:
!> \param - obj%na Order of matrix
!> \param - obj%nev number of eigenvalues/vectors to be computed
!> The smallest nev eigenvalues/eigenvectors are calculated.
!> \param - obj%local_nrows Leading dimension of a
!> \param - obj%local_ncols local columns of matrix q
!> \param - obj%nblk blocksize of cyclic distribution, must be the same in both directions!
!> \param - obj%mpi_comm_rows MPI communicator for rows
!> \param - obj%mpi_comm_cols MPI communicator for columns
!> \param - obj%mpi_comm_parent MPI communicator for columns
!> \param - obj%gpu use GPU version (1 or 0)
!>
!> \param a(lda,matrixCols) Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param ev(na) On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q(ldq,matrixCols) On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!>
!> \result success
#define REALCASE 1
#define DOUBLE_PRECISION 1
#define ACTIVATE_SKEW
#include "../general/precision_macros.h"
#include "elpa1_template.F90"
#undef ACTIVATE_SKEW
#undef REALCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_REAL
!> \brief elpa_solve_evp_real_1stage_single_impl: Fortran function to solve the real single-precision eigenvalue problem with 1-stage solver
!> \details
!> \param obj elpa_t object contains:
!> \param - obj%na Order of matrix
!> \param - obj%nev number of eigenvalues/vectors to be computed
!> The smallest nev eigenvalues/eigenvectors are calculated.
!> \param - obj%local_nrows Leading dimension of a
!> \param - obj%local_ncols local columns of matrix q
!> \param - obj%nblk blocksize of cyclic distribution, must be the same in both directions!
!> \param - obj%mpi_comm_rows MPI communicator for rows
!> \param - obj%mpi_comm_cols MPI communicator for columns
!> \param - obj%mpi_comm_parent MPI communicator for columns
!> \param - obj%gpu use GPU version (1 or 0)
!>
!> \param a(lda,matrixCols) Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param ev(na) On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q(ldq,matrixCols) On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!>
!> \result success
#define REALCASE 1
#define SINGLE_PRECISION 1
#define ACTIVATE_SKEW
#include "../general/precision_macros.h"
#include "elpa1_template.F90"
#undef REALCASE
#undef ACTIVATE_SKEW
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_REAL */
#endif /* HAVE_SKEWSYMMETRIC */
end module ELPA1_impl end module ELPA1_impl
...@@ -55,11 +55,19 @@ ...@@ -55,11 +55,19 @@
#include "../general/sanity.F90" #include "../general/sanity.F90"
#include "../general/error_checking.inc" #include "../general/error_checking.inc"
#ifdef ACTIVATE_SKEW
function elpa_solve_skew_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
&_impl (obj, &
#else
function elpa_solve_evp_& function elpa_solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_1stage_& &_1stage_&
&PRECISION& &PRECISION&
&_impl (obj, & &_impl (obj, &
#endif
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
aExtern, & aExtern, &
#else #else
...@@ -98,7 +106,7 @@ function elpa_solve_evp_& ...@@ -98,7 +106,7 @@ function elpa_solve_evp_&
MATH_DATATYPE(kind=rck), optional,target,intent(out) :: qExtern(obj%local_nrows,*) MATH_DATATYPE(kind=rck), optional,target,intent(out) :: qExtern(obj%local_nrows,*)
#else #else
MATH_DATATYPE(kind=rck), intent(inout), target :: aExtern(obj%local_nrows,obj%local_ncols) MATH_DATATYPE(kind=rck), intent(inout), target :: aExtern(obj%local_nrows,obj%local_ncols)
#ifdef HAVE_SKEWSYMMETRIC #ifdef ACTIVATE_SKEW
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,2*obj%local_ncols) MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,2*obj%local_ncols)
#else #else
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,obj%local_ncols) MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: qExtern(obj%local_nrows,obj%local_ncols)
...@@ -112,7 +120,7 @@ function elpa_solve_evp_& ...@@ -112,7 +120,7 @@ function elpa_solve_evp_&
MATH_DATATYPE(kind=rck), optional,target,intent(out) :: q(obj%local_nrows,*) MATH_DATATYPE(kind=rck), optional,target,intent(out) :: q(obj%local_nrows,*)
#else #else
MATH_DATATYPE(kind=rck), intent(inout), target :: a(obj%local_nrows,obj%local_ncols) MATH_DATATYPE(kind=rck), intent(inout), target :: a(obj%local_nrows,obj%local_ncols)
#ifdef HAVE_SKEWSYMMETRIC #ifdef ACTIVATE_SKEW
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols) MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols)
#else #else
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols) MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
...@@ -187,7 +195,11 @@ function elpa_solve_evp_& ...@@ -187,7 +195,11 @@ function elpa_solve_evp_&
logical :: reDistributeMatrix, doRedistributeMatrix logical :: reDistributeMatrix, doRedistributeMatrix
#ifdef ACTIVATE_SKEW
call obj%timer%start("elpa_solve_skew_evp_&
#else
call obj%timer%start("elpa_solve_evp_& call obj%timer%start("elpa_solve_evp_&
#endif
&MATH_DATATYPE& &MATH_DATATYPE&
&_1stage_& &_1stage_&
&PRECISION& &PRECISION&
...@@ -283,7 +295,11 @@ function elpa_solve_evp_& ...@@ -283,7 +295,11 @@ function elpa_solve_evp_&
#ifdef WITH_OPENMP_TRADITIONAL #ifdef WITH_OPENMP_TRADITIONAL
call omp_set_num_threads(omp_threads_caller) call omp_set_num_threads(omp_threads_caller)
#endif #endif
#ifdef ACTIVATE_SKEW
call obj%timer%stop("elpa_solve_skew_evp_&
#else
call obj%timer%stop("elpa_solve_evp_& call obj%timer%stop("elpa_solve_evp_&
#endif
&MATH_DATATYPE& &MATH_DATATYPE&
&_1stage_& &_1stage_&
&PRECISION& &PRECISION&
...@@ -324,13 +340,17 @@ function elpa_solve_evp_& ...@@ -324,13 +340,17 @@ function elpa_solve_evp_&
useGPU = .false. useGPU = .false.
endif endif
call obj%get("is_skewsymmetric",skewsymmetric,error) #ifdef ACTIVATE_SKEW
if (error .ne. ELPA_OK) then !call obj%get("is_skewsymmetric",skewsymmetric,error)
print *,"Problem getting option for skewsymmetric. Aborting..." !if (error .ne. ELPA_OK) then
stop ! print *,"Problem getting option for skewsymmetric. Aborting..."
endif ! stop
!endif
isSkewsymmetric = (skewsymmetric == 1) !isSkewsymmetric = (skewsymmetric == 1)
isSkewsymmetric = .true.
#else
isSkewsymmetric = .false.
#endif
call obj%timer%start("mpi_communication") call obj%timer%start("mpi_communication")
...@@ -457,7 +477,8 @@ function elpa_solve_evp_& ...@@ -457,7 +477,8 @@ function elpa_solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_& &_&
&PRECISION& &PRECISION&
& (obj, na, a, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads) & (obj, na, a, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug, nrThreads, &
isSkewsymmetric)
#ifdef WITH_NVTX #ifdef WITH_NVTX
call nvtxRangePop() call nvtxRangePop()
...@@ -655,7 +676,11 @@ function elpa_solve_evp_& ...@@ -655,7 +676,11 @@ function elpa_solve_evp_&
call blacs_gridexit(blacs_ctxt_) call blacs_gridexit(blacs_ctxt_)
endif endif
#endif /* REDISTRIBUTE_MATRIX */ #endif /* REDISTRIBUTE_MATRIX */
#ifdef ACTIVATE_SKEW
call obj%timer%stop("elpa_solve_skew_evp_&
#else
call obj%timer%stop("elpa_solve_evp_& call obj%timer%stop("elpa_solve_evp_&
#endif
&MATH_DATATYPE& &MATH_DATATYPE&
&_1stage_& &_1stage_&
&PRECISION& &PRECISION&
......
...@@ -96,7 +96,8 @@ subroutine tridiag_& ...@@ -96,7 +96,8 @@ subroutine tridiag_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_& &_&
&PRECISION & &PRECISION &
(obj, na, a_mat, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, d_vec, e_vec, tau, useGPU, wantDebug, max_threads) (obj, na, a_mat, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, d_vec, e_vec, tau, useGPU, wantDebug, max_threads, &
isSkewsymmetric)
use, intrinsic :: iso_c_binding use, intrinsic :: iso_c_binding
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
...@@ -110,8 +111,7 @@ subroutine tridiag_& ...@@ -110,8 +111,7 @@ subroutine tridiag_&
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: na, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols integer(kind=ik), intent(in) :: na, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
logical, intent(in) :: useGPU, wantDebug logical, intent(in) :: useGPU, wantDebug
integer(kind=c_int) :: skewsymmetric logical, intent(in) :: isSkewsymmetric
logical :: isSkewsymmetric
MATH_DATATYPE(kind=rck), intent(out) :: tau(na) MATH_DATATYPE(kind=rck), intent(out) :: tau(na)
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
...@@ -189,13 +189,6 @@ subroutine tridiag_& ...@@ -189,13 +189,6 @@ subroutine tridiag_&
&MATH_DATATYPE &MATH_DATATYPE
logical :: useIntelGPU logical :: useIntelGPU
call obj%get("is_skewsymmetric",skewsymmetric,istat)
if (istat .ne. ELPA_OK) then
print *,"Problem getting option for skewsymmetric settings. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
if(useGPU) then if(useGPU) then
gpuString = "_gpu" gpuString = "_gpu"
else else
......
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
! Author: Andreas Marek, MPCDF
#endif
#include "config-f90.h"
#include "../general/sanity.F90"
subroutine elpa_transpose_vectors_ss_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, vmat_s, ld_s, comm_s, vmat_t, ld_t, comm_t, nvs, nvr, nvc, nblk, nrThreads)
!-------------------------------------------------------------------------------
! This routine transposes an array of vectors which are distributed in
! communicator comm_s into its transposed form distributed in communicator comm_t.
! There must be an identical copy of vmat_s in every communicator comm_s.
! After this routine, there is an identical copy of vmat_t in every communicator comm_t.
!
! vmat_s original array of vectors
! ld_s leading dimension of vmat_s
! comm_s communicator over which vmat_s is distributed
! vmat_t array of vectors in transposed form
! ld_t leading dimension of vmat_t
! comm_t communicator over which vmat_t is distributed
! nvs global index where to start in vmat_s/vmat_t
! Please note: this is kind of a hint, some values before nvs will be
! accessed in vmat_s/put into vmat_t
! nvr global length of vmat_s/vmat_t
! nvc number of columns in vmat_s/vmat_t
! nblk block size of block cyclic distribution
!
!-------------------------------------------------------------------------------
use precision
use elpa_abstract_impl
#ifdef WITH_OPENMP_TRADITIONAL
use omp_lib
#endif
use elpa_mpi
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: ld_s, comm_s, ld_t, comm_t, nvs, nvr, nvc, nblk
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in) :: vmat_s(ld_s,nvc)
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout):: vmat_t(ld_t,nvc)
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable :: aux(:)
integer(kind=ik) :: myps, mypt, nps, npt
integer(kind=ik) :: n, lc, k, i, ips, ipt, ns, nl, mpierr
integer(kind=ik) :: lcm_s_t, nblks_tot, nblks_comm, nblks_skip
integer(kind=ik) :: auxstride
integer(kind=ik), intent(in) :: nrThreads
integer(kind=ik) :: istat
character(200) :: errorMessage
call obj%timer%start("elpa_transpose_vectors_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
call obj%timer%start("mpi_communication")
call mpi_comm_rank(comm_s,myps,mpierr)
call mpi_comm_size(comm_s,nps ,mpierr)
call mpi_comm_rank(comm_t,mypt,mpierr)
call mpi_comm_size(comm_t,npt ,mpierr)
call obj%timer%stop("mpi_communication")
! The basic idea of this routine is that for every block (in the block cyclic
! distribution), the processor within comm_t which owns the diagonal
! broadcasts its values of vmat_s to all processors within comm_t.
! Of course this has not to be done for every block separately, since
! the communictation pattern repeats in the global matrix after
! the least common multiple of (nps,npt) blocks
lcm_s_t = least_common_multiple(nps,npt) ! least common multiple of nps, npt
nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
! Get the number of blocks to be skipped at the begin.
! This must be a multiple of lcm_s_t (else it is getting complicated),
! thus some elements before nvs will be accessed/set.
nblks_skip = ((nvs-1)/(nblk*lcm_s_t))*lcm_s_t
allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
check_allocate("elpa_transpose_vectors_ss: aux", istat, errorMessage)
#ifdef WITH_OPENMP_TRADITIONAL
!$omp parallel &
!$omp default(none) &
!$omp private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n) &
!$omp shared(nps, npt, lcm_s_t, mypt, nblk, myps, vmat_t, mpierr, comm_s, &
!$omp& obj, vmat_s, aux, nblks_skip, nblks_tot, nvc, nvr)
#endif
do n = 0, lcm_s_t-1
ips = mod(n,nps)
ipt = mod(n,npt)
if (mypt == ipt) then
nblks_comm = (nblks_tot-nblks_skip-n+lcm_s_t-1)/lcm_s_t
auxstride = nblk * nblks_comm
! if(nblks_comm==0) cycle
if (nblks_comm .ne. 0) then
if (myps == ips) then
! k = 0
#ifdef WITH_OPENMP_TRADITIONAL
!$omp do
#endif
do lc=1,nvc
do i = nblks_skip+n, nblks_tot-1, lcm_s_t
k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
ns = (i/nps)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
aux(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
! k = k+nblk
enddo
enddo
endif
#ifdef WITH_OPENMP_TRADITIONAL
!$omp barrier