Commit d60e24e9 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master' of gitta:amarek/ELPA into ELPA_GPU

parents ab3f568f ccc7d1b9
...@@ -11,6 +11,11 @@ If you want to build (or have to since no packages are available) ELPA yourself, ...@@ -11,6 +11,11 @@ If you want to build (or have to since no packages are available) ELPA yourself,
please note that ELPA is shipped with a typical "configure" and "make" please note that ELPA is shipped with a typical "configure" and "make"
procedure. This is the only supported way how to build and install ELPA. procedure. This is the only supported way how to build and install ELPA.
If you obtained ELPA from the official git repository, you will not find
the needed configure script! Please look at the "INSTALL_FROM_GIT_VERSION" file
for the documentation how to proceed.
If --- against our recommendations --- you do not want to install ELPA as If --- against our recommendations --- you do not want to install ELPA as
library, but to include it in your source code you will have to find a solution library, but to include it in your source code you will have to find a solution
by yourself. If you do this anyway, please distribute then all files of ELPA by yourself. If you do this anyway, please distribute then all files of ELPA
......
Welcome to the git-based distribution of the ELPA eigensolver library.
If you are reading this file, you have obtained the ELPA library
through the git repository that hosts the source code and also allows
you to contribute improvements to the project if necessary.
The git version does not contain the necessary build script:
configure, Makefile ...
If you use the git version, you are most likely actively developing
features/improvements for ELPA, and a rebuild of the autotools scripts
will be necessary anyway.
Thus please run "autogen.sh" after your changes, in order to build the
autotoos scripts. Note that autoconf version >= 2.69 is needed for
ELPA.
After this step, please proceed as written in the "INSTALL" file.
Welcome to the git-based distribution of the ELPA eigensolver library. If you obtained ELPA via the official git repository please have
a look at the "INSTALL_FROM_GIT_VERSION" for specific instructions
If you are reading this file, you have obtained the ELPA library
through the git repository that hosts the source code and also allows
you to contribute improvements to the project if necessary.
In your use of ELPA, please respect the copyright restrictions In your use of ELPA, please respect the copyright restrictions
found below and in the "COPYING" directory in this repository. In a found below and in the "COPYING" directory in this repository. In a
......
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([elpa],[2015.02.002], [elpa-library@rzg.mpg.de]) AC_INIT([elpa],[2015.05.001], [elpa-library@rzg.mpg.de])
AC_SUBST([PACKAGE_VERSION]) AC_SUBST([PACKAGE_VERSION])
AC_CONFIG_SRCDIR([src/elpa1.F90]) AC_CONFIG_SRCDIR([src/elpa1.F90])
...@@ -34,7 +34,7 @@ rm -rf config.h config-f90.h ...@@ -34,7 +34,7 @@ rm -rf config.h config-f90.h
# by the current interface, as they are ABI compatible (e.g. only new symbols # by the current interface, as they are ABI compatible (e.g. only new symbols
# were added by the new interface) # were added by the new interface)
# #
AC_SUBST([ELPA_SO_VERSION], [3:0:0]) AC_SUBST([ELPA_SO_VERSION], [3:1:0])
# #
...@@ -118,6 +118,7 @@ fi ...@@ -118,6 +118,7 @@ fi
dnl variables needed for the tests dnl variables needed for the tests
dnl do NOT remove any variables here, until dnl do NOT remove any variables here, until
dnl 1. you know 100% what you are doing dnl 1. you know 100% what you are doing
dnl 2. you tested ALL configure functionality afterwards dnl 2. you tested ALL configure functionality afterwards
...@@ -126,6 +127,7 @@ dnl Otherwise, you most likely break some functionality ...@@ -126,6 +127,7 @@ dnl Otherwise, you most likely break some functionality
dnl as default always define the generic kernels to be build dnl as default always define the generic kernels to be build
dnl this is only unset if gpu_support_only is defined, or dnl this is only unset if gpu_support_only is defined, or
dnl other specific real/complex kernels are wanted dnl other specific real/complex kernels are wanted
install_real_generic=yes install_real_generic=yes
install_real_generic_simple=yes install_real_generic_simple=yes
...@@ -424,8 +426,6 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([ ...@@ -424,8 +426,6 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
) )
AC_MSG_RESULT([${fortran_can_check_environment}]) AC_MSG_RESULT([${fortran_can_check_environment}])
dnl check whether GPU version is requested dnl check whether GPU version is requested
#CUDA_INSTALL_PATH="/usr/local/cuda/" #CUDA_INSTALL_PATH="/usr/local/cuda/"
...@@ -536,7 +536,6 @@ dnl GPU version only ...@@ -536,7 +536,6 @@ dnl GPU version only
m4_include([m4/ax_elpa_gpu_version_only.m4]) m4_include([m4/ax_elpa_gpu_version_only.m4])
DEFINE_OPTION_GPU_SUPPORT_ONLY([gpu-version-only],[gpu-support],[install_gpu]) DEFINE_OPTION_GPU_SUPPORT_ONLY([gpu-version-only],[gpu-support],[install_gpu])
dnl last check whether user wants to compile only a specific kernel dnl last check whether user wants to compile only a specific kernel
dnl dnl
m4_include([m4/ax_elpa_specific_kernels.m4]) m4_include([m4/ax_elpa_specific_kernels.m4])
...@@ -603,6 +602,7 @@ dnl set the conditionals according to the previous tests ...@@ -603,6 +602,7 @@ dnl set the conditionals according to the previous tests
if test x"${can_use_iso_fortran_env}" = x"yes" ; then if test x"${can_use_iso_fortran_env}" = x"yes" ; then
AC_DEFINE([HAVE_ISO_FORTRAN_ENV],[1],[can use module iso_fortran_env]) AC_DEFINE([HAVE_ISO_FORTRAN_ENV],[1],[can use module iso_fortran_env])
fi fi
AM_CONDITIONAL([WITH_GPU_VERSION],[test x"$install_gpu" = x"yes"]) AM_CONDITIONAL([WITH_GPU_VERSION],[test x"$install_gpu" = x"yes"])
if test x"${install_gpu}" = x"yes" ; then if test x"${install_gpu}" = x"yes" ; then
AC_DEFINE([WITH_GPU_VERSION],[1],[enable GPU support]) AC_DEFINE([WITH_GPU_VERSION],[1],[enable GPU support])
......
...@@ -58,8 +58,7 @@ m4_copy([_AX_ELPA_LANG_OPENMP(Fortran 77)], [_AX_ELPA_LANG_OPENMP(Fortran)]) ...@@ -58,8 +58,7 @@ m4_copy([_AX_ELPA_LANG_OPENMP(Fortran 77)], [_AX_ELPA_LANG_OPENMP(Fortran)])
AC_DEFUN([AX_ELPA_OPENMP], AC_DEFUN([AX_ELPA_OPENMP],
[ [
OPENMP_[]_AC_LANG_PREFIX[]FLAGS= OPENMP_[]_AC_LANG_PREFIX[]FLAGS=
AC_ARG_ENABLE([openmp], enable_openmp="yes"
[AS_HELP_STRING([--disable-openmp], [do not use OpenMP])])
if test "$enable_openmp" != no; then if test "$enable_openmp" != no; then
AC_CACHE_CHECK([for _AC_LANG_ABBREV option to support OpenMP], AC_CACHE_CHECK([for _AC_LANG_ABBREV option to support OpenMP],
[ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp], [ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp],
......
This diff is collapsed.
This diff is collapsed.
...@@ -60,12 +60,7 @@ ...@@ -60,12 +60,7 @@
#include <elpa/elpa_kernel_constants.h> #include <elpa/elpa_kernel_constants.h>
module ELPA2_utilities module ELPA2_utilities
use ELPA_utilities
#ifdef HAVE_ISO_FORTRAN_ENV
use iso_fortran_env, only : error_unit
#endif
implicit none implicit none
PRIVATE ! By default, all routines contained are private PRIVATE ! By default, all routines contained are private
...@@ -97,10 +92,6 @@ module ELPA2_utilities ...@@ -97,10 +92,6 @@ module ELPA2_utilities
public :: qr_decomposition_via_environment_variable public :: qr_decomposition_via_environment_variable
#ifndef HAVE_ISO_FORTRAN_ENV
integer, parameter :: error_unit = 6
#endif
integer, parameter :: number_of_real_kernels = ELPA2_NUMBER_OF_REAL_KERNELS integer, parameter :: number_of_real_kernels = ELPA2_NUMBER_OF_REAL_KERNELS
integer, parameter :: REAL_ELPA_KERNEL_GENERIC = ELPA2_REAL_KERNEL_GENERIC integer, parameter :: REAL_ELPA_KERNEL_GENERIC = ELPA2_REAL_KERNEL_GENERIC
integer, parameter :: REAL_ELPA_KERNEL_GENERIC_SIMPLE = ELPA2_REAL_KERNEL_GENERIC_SIMPLE integer, parameter :: REAL_ELPA_KERNEL_GENERIC_SIMPLE = ELPA2_REAL_KERNEL_GENERIC_SIMPLE
......
...@@ -64,21 +64,21 @@ ...@@ -64,21 +64,21 @@
end function end function
!c> int elpa_solve_evp_real_stage1(int na, int nev, int ncols, double *a, int lda, double *ev, double *q, int ldq, int nblk, int mpi_comm_rows, int mpi_comm_cols); !c> int elpa_solve_evp_real_stage1(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
function solve_elpa1_evp_real_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, & function solve_elpa1_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols) & matrixCols, mpi_comm_rows, mpi_comm_cols) &
result(success) bind(C,name="elpa_solve_evp_real_1stage") result(success) bind(C,name="elpa_solve_evp_real_1stage")
use, intrinsic :: iso_c_binding use, intrinsic :: iso_c_binding
use elpa1, only : solve_evp_real use elpa1, only : solve_evp_real
integer(kind=c_int) :: success integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, mpi_comm_cols, mpi_comm_rows integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows
real(kind=c_double) :: a(1:lda,1:ncols), ev(1:na), q(1:ldq,1:ncols) real(kind=c_double) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
logical :: successFortran logical :: successFortran
successFortran = solve_evp_real(na, nev, a, lda, ev, q, ldq, nblk, mpi_comm_rows, mpi_comm_cols) successFortran = solve_evp_real(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
if (successFortran) then if (successFortran) then
success = 1 success = 1
...@@ -88,22 +88,22 @@ ...@@ -88,22 +88,22 @@
end function end function
! int elpa_solve_evp_complex_stage1(int na, int nev, int ncols double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int mpi_comm_rows, int mpi_comm_cols); ! int elpa_solve_evp_complex_stage1(int na, int nev, double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
function solve_evp_real_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, & function solve_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols) & matrixCols, mpi_comm_rows, mpi_comm_cols) &
result(success) bind(C,name="elpa_solve_evp_complex_1stage") result(success) bind(C,name="elpa_solve_evp_complex_1stage")
use, intrinsic :: iso_c_binding use, intrinsic :: iso_c_binding
use elpa1, only : solve_evp_complex use elpa1, only : solve_evp_complex
integer(kind=c_int) :: success integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, mpi_comm_cols, mpi_comm_rows integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows
complex(kind=c_double_complex) :: a(1:lda,1:ncols), q(1:ldq,1:ncols) complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_double) :: ev(1:na) real(kind=c_double) :: ev(1:na)
logical :: successFortran logical :: successFortran
successFortran = solve_evp_complex(na, nev, a, lda, ev, q, ldq, nblk, mpi_comm_rows, mpi_comm_cols) successFortran = solve_evp_complex(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
if (successFortran) then if (successFortran) then
success = 1 success = 1
...@@ -113,9 +113,9 @@ ...@@ -113,9 +113,9 @@
end function end function
!c> int elpa_solve_evp_real_stage2(int na, int nev, int ncols, double *a, int lda, double *ev, double *q, int ldq, int nblk, int_na_rows, int na_cols, int mpi_comm_rows, int mpi_comm_cols, int THIS_REAL_ELPA_KERNEL_API, int useQR); !c> int elpa_solve_evp_real_stage2(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int THIS_REAL_ELPA_KERNEL_API, int useQR);
function solve_elpa2_evp_real_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, & function solve_elpa2_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
na_rows, na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQR) & THIS_REAL_ELPA_KERNEL_API, useQR) &
result(success) bind(C,name="elpa_solve_evp_real_2stage") result(success) bind(C,name="elpa_solve_evp_real_2stage")
...@@ -123,10 +123,10 @@ ...@@ -123,10 +123,10 @@
use elpa2, only : solve_evp_real_2stage use elpa2, only : solve_evp_real_2stage
integer(kind=c_int) :: success integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, na_rows, na_cols, mpi_comm_cols, mpi_comm_rows, & integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR
real(kind=c_double) :: a(1:lda,1:ncols), ev(1:na), q(1:ldq,1:ncols) real(kind=c_double) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
...@@ -138,7 +138,7 @@ ...@@ -138,7 +138,7 @@
useQRFortran = .true. useQRFortran = .true.
endif endif
successFortran = solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, na_rows, na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & successFortran = solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQRFortran) THIS_REAL_ELPA_KERNEL_API, useQRFortran)
if (successFortran) then if (successFortran) then
...@@ -149,9 +149,9 @@ ...@@ -149,9 +149,9 @@
end function end function
! int elpa_solve_evp_complex_stage2(int na, int nev, int ncols, double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int na_rows, int na_cols, int mpi_comm_rows, int mpi_comm_cols); ! int elpa_solve_evp_complex_stage2(int na, int nev, double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
function solve_elpa2_evp_complex_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, & function solve_elpa2_evp_complex_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
na_rows, na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API) & THIS_COMPLEX_ELPA_KERNEL_API) &
result(success) bind(C,name="elpa_solve_evp_complex_2stage") result(success) bind(C,name="elpa_solve_evp_complex_2stage")
...@@ -159,14 +159,14 @@ ...@@ -159,14 +159,14 @@
use elpa2, only : solve_evp_complex_2stage use elpa2, only : solve_evp_complex_2stage
integer(kind=c_int) :: success integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, na_rows, na_cols, mpi_comm_cols, mpi_comm_rows, & integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API
complex(kind=c_double_complex) :: a(1:lda,1:ncols), q(1:ldq,1:ncols) complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_double) :: ev(1:na) real(kind=c_double) :: ev(1:na)
logical :: successFortran logical :: successFortran
successFortran = solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, na_rows, na_cols, mpi_comm_rows, mpi_comm_cols, & successFortran = solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API) mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API)
if (successFortran) then if (successFortran) then
......
#if REALCASE==1
subroutine elpa_reduce_add_vectors_real(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvr,nvc,nblk)
#endif
#if COMPLEXCASE==1
subroutine elpa_reduce_add_vectors_complex(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvr,nvc,nblk)
#endif
!-------------------------------------------------------------------------------
! This routine does a reduce of all vectors in vmat_s over the communicator comm_t.
! The result of the reduce is gathered on the processors owning the diagonal
! and added to the array of vectors vmat_t (which is distributed over comm_t).
!
! Opposed to elpa_transpose_vectors, there is NO identical copy of vmat_s
! in the different members within vmat_t (else a reduce wouldn't be necessary).
! After this routine, an allreduce of vmat_t has to be done.
!
! vmat_s array of vectors to be reduced and added
! ld_s leading dimension of vmat_s
! comm_s communicator over which vmat_s is distributed
! vmat_t array of vectors to which vmat_s is added
! ld_t leading dimension of vmat_t
! comm_t communicator over which vmat_t is distributed
! nvr global length of vmat_s/vmat_t
! nvc number of columns in vmat_s/vmat_t
! nblk block size of block cyclic distribution
!
!-------------------------------------------------------------------------------
! use ELPA1 ! for least_common_multiple
implicit none
include 'mpif.h'
integer, intent(in) :: ld_s, comm_s, ld_t, comm_t, nvr, nvc, nblk
DATATYPE*BYTESIZE, intent(in) :: vmat_s(ld_s,nvc)
DATATYPE*BYTESIZE, intent(inout) :: vmat_t(ld_t,nvc)
DATATYPE*BYTESIZE, allocatable :: aux1(:), aux2(:)
integer myps, mypt, nps, npt
integer n, lc, k, i, ips, ipt, ns, nl, mpierr
integer lcm_s_t, nblks_tot
call mpi_comm_rank(comm_s,myps,mpierr)
call mpi_comm_size(comm_s,nps ,mpierr)
call mpi_comm_rank(comm_t,mypt,mpierr)
call mpi_comm_size(comm_t,npt ,mpierr)
! Look to elpa_transpose_vectors for the basic idea!
! The communictation pattern repeats in the global matrix after
! the least common multiple of (nps,npt) blocks
lcm_s_t = least_common_multiple(nps,npt) ! least common multiple of nps, npt
nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
allocate(aux1( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
allocate(aux2( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
aux1(:) = 0
aux2(:) = 0
do n = 0, lcm_s_t-1
ips = mod(n,nps)
ipt = mod(n,npt)
if(myps == ips) then
k = 0
do lc=1,nvc
do i = n, nblks_tot-1, lcm_s_t
ns = (i/nps)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
aux1(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
k = k+nblk
enddo
enddo
#if REALCASE==1
if(k>0) call mpi_reduce(aux1,aux2,k,MPI_REAL8,MPI_SUM,ipt,comm_t,mpierr)
#endif
#if COMPLEXCASE==1
if(k>0) call mpi_reduce(aux1,aux2,k,MPI_DOUBLE_COMPLEX,MPI_SUM,ipt,comm_t,mpierr)
#endif
if(mypt == ipt) then
k = 0
do lc=1,nvc
do i = n, nblks_tot-1, lcm_s_t
ns = (i/npt)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
vmat_t(ns+1:ns+nl,lc) = vmat_t(ns+1:ns+nl,lc) + aux2(k+1:k+nl)
k = k+nblk
enddo
enddo
endif
endif
enddo
deallocate(aux1)
deallocate(aux2)
end subroutine
#if REALCASE==1
subroutine elpa_transpose_vectors_real(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvs,nvr,nvc,nblk)
#endif
#if COMPLEXCASE==1
subroutine elpa_transpose_vectors_complex(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvs,nvr,nvc,nblk)
#endif
!-------------------------------------------------------------------------------
! This routine transposes an array of vectors which are distributed in
! communicator comm_s into its transposed form distributed in communicator comm_t.
! There must be an identical copy of vmat_s in every communicator comm_s.
! After this routine, there is an identical copy of vmat_t in every communicator comm_t.
!
! vmat_s original array of vectors
! ld_s leading dimension of vmat_s
! comm_s communicator over which vmat_s is distributed
! vmat_t array of vectors in transposed form
! ld_t leading dimension of vmat_t
! comm_t communicator over which vmat_t is distributed
! nvs global index where to start in vmat_s/vmat_t
! Please note: this is kind of a hint, some values before nvs will be
! accessed in vmat_s/put into vmat_t
! nvr global length of vmat_s/vmat_t
! nvc number of columns in vmat_s/vmat_t
! nblk block size of block cyclic distribution
!
!-------------------------------------------------------------------------------
! use ELPA1 ! for least_common_multiple
implicit none
include 'mpif.h'
integer, intent(in) :: ld_s, comm_s, ld_t, comm_t, nvs, nvr, nvc, nblk
DATATYPE*BYTESIZE, intent(in) :: vmat_s(ld_s,nvc)
DATATYPE*BYTESIZE, intent(inout) :: vmat_t(ld_t,nvc)
DATATYPE*BYTESIZE, allocatable :: aux(:)
integer :: myps, mypt, nps, npt
integer :: n, lc, k, i, ips, ipt, ns, nl, mpierr
integer :: lcm_s_t, nblks_tot, nblks_comm, nblks_skip
call mpi_comm_rank(comm_s,myps,mpierr)
call mpi_comm_size(comm_s,nps ,mpierr)
call mpi_comm_rank(comm_t,mypt,mpierr)
call mpi_comm_size(comm_t,npt ,mpierr)
! The basic idea of this routine is that for every block (in the block cyclic
! distribution), the processor within comm_t which owns the diagonal
! broadcasts its values of vmat_s to all processors within comm_t.
! Of course this has not to be done for every block separately, since
! the communictation pattern repeats in the global matrix after
! the least common multiple of (nps,npt) blocks
lcm_s_t = least_common_multiple(nps,npt) ! least common multiple of nps, npt
nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
! Get the number of blocks to be skipped at the begin.
! This must be a multiple of lcm_s_t (else it is getting complicated),
! thus some elements before nvs will be accessed/set.
nblks_skip = ((nvs-1)/(nblk*lcm_s_t))*lcm_s_t
allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
do n = 0, lcm_s_t-1
ips = mod(n,nps)
ipt = mod(n,npt)
if(mypt == ipt) then
nblks_comm = (nblks_tot-nblks_skip-n+lcm_s_t-1)/lcm_s_t
if(nblks_comm==0) cycle
if(myps == ips) then
k = 0
do lc=1,nvc
do i = nblks_skip+n, nblks_tot-1, lcm_s_t
ns = (i/nps)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
aux(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
k = k+nblk
enddo
enddo
endif
#if COMPLEXCASE==1
call MPI_Bcast(aux,nblks_comm*nblk*nvc,MPI_DOUBLE_COMPLEX,ips,comm_s,mpierr)
#endif
#if REALCASE==1
call MPI_Bcast(aux,nblks_comm*nblk*nvc,MPI_REAL8,ips,comm_s,mpierr)
#endif
k = 0
do lc=1,nvc
do i = nblks_skip+n, nblks_tot-1, lcm_s_t
ns = (i/npt)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
vmat_t(ns+1:ns+nl,lc) = aux(k+1:k+nl)
k = k+nblk
enddo
enddo
endif
enddo
deallocate(aux)
end subroutine
...@@ -60,18 +60,17 @@ ...@@ -60,18 +60,17 @@
module ELPA_utilities module ELPA_utilities
#ifdef HAVE_ISO_FORTRAN_ENV #ifdef HAVE_ISO_FORTRAN_ENV
use iso_fortran_env, only : error_unit use iso_fortran_env, only : error_unit
#endif #endif
implicit none implicit none
PRIVATE ! By default, all routines contained are private private ! By default, all routines contained are private
public :: debug_messages_via_environment_variable, pcol, prow public :: debug_messages_via_environment_variable, pcol, prow, error_unit
#ifndef HAVE_ISO_FORTRAN_ENV #ifndef HAVE_ISO_FORTRAN_ENV
integer, parameter :: error_unit = 6 integer, parameter :: error_unit = 0
#endif #endif
......