Commit b48cf00a authored by Andreas Marek's avatar Andreas Marek

Start to remove assumed size arrays

This commit is not ABI compatible, since it changes the interfaces
of some routines

Also, introduce type checking for transpose and reduce_add routines
parent bf168297
This diff is collapsed.
This diff is collapsed.
......@@ -64,21 +64,21 @@
end function
!c> int elpa_solve_evp_real_stage1(int na, int nev, int ncols, double *a, int lda, double *ev, double *q, int ldq, int nblk, int mpi_comm_rows, int mpi_comm_cols);
function solve_elpa1_evp_real_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols) &
!c> int elpa_solve_evp_real_stage1(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
function solve_elpa1_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols) &
result(success) bind(C,name="elpa_solve_evp_real_1stage")
use, intrinsic :: iso_c_binding
use elpa1, only : solve_evp_real
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, mpi_comm_cols, mpi_comm_rows
real(kind=c_double) :: a(1:lda,1:ncols), ev(1:na), q(1:ldq,1:ncols)
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows
real(kind=c_double) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
logical :: successFortran
successFortran = solve_evp_real(na, nev, a, lda, ev, q, ldq, nblk, mpi_comm_rows, mpi_comm_cols)
successFortran = solve_evp_real(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
if (successFortran) then
success = 1
......@@ -88,22 +88,22 @@
end function
! int elpa_solve_evp_complex_stage1(int na, int nev, int ncols double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int mpi_comm_rows, int mpi_comm_cols);
function solve_evp_real_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols) &
! int elpa_solve_evp_complex_stage1(int na, int nev, double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
function solve_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols) &
result(success) bind(C,name="elpa_solve_evp_complex_1stage")
use, intrinsic :: iso_c_binding
use elpa1, only : solve_evp_complex
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, mpi_comm_cols, mpi_comm_rows
complex(kind=c_double_complex) :: a(1:lda,1:ncols), q(1:ldq,1:ncols)
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows
complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_double) :: ev(1:na)
logical :: successFortran
successFortran = solve_evp_complex(na, nev, a, lda, ev, q, ldq, nblk, mpi_comm_rows, mpi_comm_cols)
successFortran = solve_evp_complex(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
if (successFortran) then
success = 1
......@@ -113,9 +113,9 @@
end function
!c> int elpa_solve_evp_real_stage2(int na, int nev, int ncols, double *a, int lda, double *ev, double *q, int ldq, int nblk, int mpi_comm_rows, int mpi_comm_cols, int THIS_REAL_ELPA_KERNEL_API, int useQR);
function solve_elpa2_evp_real_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
!c> int elpa_solve_evp_real_stage2(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int THIS_REAL_ELPA_KERNEL_API, int useQR);
function solve_elpa2_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQR) &
result(success) bind(C,name="elpa_solve_evp_real_2stage")
......@@ -123,10 +123,10 @@
use elpa2, only : solve_evp_real_2stage
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, mpi_comm_cols, mpi_comm_rows, &
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR
real(kind=c_double) :: a(1:lda,1:ncols), ev(1:na), q(1:ldq,1:ncols)
real(kind=c_double) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
......@@ -138,7 +138,7 @@
useQRFortran = .true.
endif
successFortran = solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
successFortran = solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQRFortran)
if (successFortran) then
......@@ -149,9 +149,9 @@
end function
! int elpa_solve_evp_complex_stage2(int na, int nev, int ncols, double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int mpi_comm_rows, int mpi_comm_cols);
function solve_elpa2_evp_complex_wrapper(na, nev, ncols, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
! int elpa_solve_evp_complex_stage2(int na, int nev, double_complex *a, int lda, double *ev, double_complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
function solve_elpa2_evp_complex_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API) &
result(success) bind(C,name="elpa_solve_evp_complex_2stage")
......@@ -159,14 +159,14 @@
use elpa2, only : solve_evp_complex_2stage
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, ncols, lda, ldq, nblk, mpi_comm_cols, mpi_comm_rows, &
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API
complex(kind=c_double_complex) :: a(1:lda,1:ncols), q(1:ldq,1:ncols)
complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_double) :: ev(1:na)
logical :: successFortran
successFortran = solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, mpi_comm_rows, mpi_comm_cols, &
successFortran = solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API)
if (successFortran) then
......
#if REALCASE==1
subroutine elpa_reduce_add_vectors_real(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvr,nvc,nblk)
#endif
#if COMPLEXCASE==1
subroutine elpa_reduce_add_vectors_complex(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvr,nvc,nblk)
#endif
!-------------------------------------------------------------------------------
! This routine does a reduce of all vectors in vmat_s over the communicator comm_t.
! The result of the reduce is gathered on the processors owning the diagonal
! and added to the array of vectors vmat_t (which is distributed over comm_t).
!
! Opposed to elpa_transpose_vectors, there is NO identical copy of vmat_s
! in the different members within vmat_t (else a reduce wouldn't be necessary).
! After this routine, an allreduce of vmat_t has to be done.
!
! vmat_s array of vectors to be reduced and added
! ld_s leading dimension of vmat_s
! comm_s communicator over which vmat_s is distributed
! vmat_t array of vectors to which vmat_s is added
! ld_t leading dimension of vmat_t
! comm_t communicator over which vmat_t is distributed
! nvr global length of vmat_s/vmat_t
! nvc number of columns in vmat_s/vmat_t
! nblk block size of block cyclic distribution
!
!-------------------------------------------------------------------------------
! use ELPA1 ! for least_common_multiple
implicit none
include 'mpif.h'
integer, intent(in) :: ld_s, comm_s, ld_t, comm_t, nvr, nvc, nblk
DATATYPE*BYTESIZE, intent(in) :: vmat_s(ld_s,nvc)
DATATYPE*BYTESIZE, intent(inout) :: vmat_t(ld_t,nvc)
DATATYPE*BYTESIZE, allocatable :: aux1(:), aux2(:)
integer myps, mypt, nps, npt
integer n, lc, k, i, ips, ipt, ns, nl, mpierr
integer lcm_s_t, nblks_tot
call mpi_comm_rank(comm_s,myps,mpierr)
call mpi_comm_size(comm_s,nps ,mpierr)
call mpi_comm_rank(comm_t,mypt,mpierr)
call mpi_comm_size(comm_t,npt ,mpierr)
! Look to elpa_transpose_vectors for the basic idea!
! The communictation pattern repeats in the global matrix after
! the least common multiple of (nps,npt) blocks
lcm_s_t = least_common_multiple(nps,npt) ! least common multiple of nps, npt
nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
allocate(aux1( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
allocate(aux2( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
aux1(:) = 0
aux2(:) = 0
do n = 0, lcm_s_t-1
ips = mod(n,nps)
ipt = mod(n,npt)
if(myps == ips) then
k = 0
do lc=1,nvc
do i = n, nblks_tot-1, lcm_s_t
ns = (i/nps)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
aux1(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
k = k+nblk
enddo
enddo
#if REALCASE==1
if(k>0) call mpi_reduce(aux1,aux2,k,MPI_REAL8,MPI_SUM,ipt,comm_t,mpierr)
#endif
#if COMPLEXCASE==1
if(k>0) call mpi_reduce(aux1,aux2,k,MPI_DOUBLE_COMPLEX,MPI_SUM,ipt,comm_t,mpierr)
#endif
if(mypt == ipt) then
k = 0
do lc=1,nvc
do i = n, nblks_tot-1, lcm_s_t
ns = (i/npt)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
vmat_t(ns+1:ns+nl,lc) = vmat_t(ns+1:ns+nl,lc) + aux2(k+1:k+nl)
k = k+nblk
enddo
enddo
endif
endif
enddo
deallocate(aux1)
deallocate(aux2)
end subroutine
#if REALCASE==1
subroutine elpa_transpose_vectors_real(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvs,nvr,nvc,nblk)
#endif
#if COMPLEXCASE==1
subroutine elpa_transpose_vectors_complex(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvs,nvr,nvc,nblk)
#endif
!-------------------------------------------------------------------------------
! This routine transposes an array of vectors which are distributed in
! communicator comm_s into its transposed form distributed in communicator comm_t.
! There must be an identical copy of vmat_s in every communicator comm_s.
! After this routine, there is an identical copy of vmat_t in every communicator comm_t.
!
! vmat_s original array of vectors
! ld_s leading dimension of vmat_s
! comm_s communicator over which vmat_s is distributed
! vmat_t array of vectors in transposed form
! ld_t leading dimension of vmat_t
! comm_t communicator over which vmat_t is distributed
! nvs global index where to start in vmat_s/vmat_t
! Please note: this is kind of a hint, some values before nvs will be
! accessed in vmat_s/put into vmat_t
! nvr global length of vmat_s/vmat_t
! nvc number of columns in vmat_s/vmat_t
! nblk block size of block cyclic distribution
!
!-------------------------------------------------------------------------------
! use ELPA1 ! for least_common_multiple
implicit none
include 'mpif.h'
integer, intent(in) :: ld_s, comm_s, ld_t, comm_t, nvs, nvr, nvc, nblk
DATATYPE*BYTESIZE, intent(in) :: vmat_s(ld_s,nvc)
DATATYPE*BYTESIZE, intent(inout) :: vmat_t(ld_t,nvc)
DATATYPE*BYTESIZE, allocatable :: aux(:)
integer :: myps, mypt, nps, npt
integer :: n, lc, k, i, ips, ipt, ns, nl, mpierr
integer :: lcm_s_t, nblks_tot, nblks_comm, nblks_skip
call mpi_comm_rank(comm_s,myps,mpierr)
call mpi_comm_size(comm_s,nps ,mpierr)
call mpi_comm_rank(comm_t,mypt,mpierr)
call mpi_comm_size(comm_t,npt ,mpierr)
! The basic idea of this routine is that for every block (in the block cyclic
! distribution), the processor within comm_t which owns the diagonal
! broadcasts its values of vmat_s to all processors within comm_t.
! Of course this has not to be done for every block separately, since
! the communictation pattern repeats in the global matrix after
! the least common multiple of (nps,npt) blocks
lcm_s_t = least_common_multiple(nps,npt) ! least common multiple of nps, npt
nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
! Get the number of blocks to be skipped at the begin.
! This must be a multiple of lcm_s_t (else it is getting complicated),
! thus some elements before nvs will be accessed/set.
nblks_skip = ((nvs-1)/(nblk*lcm_s_t))*lcm_s_t
allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
do n = 0, lcm_s_t-1
ips = mod(n,nps)
ipt = mod(n,npt)
if(mypt == ipt) then
nblks_comm = (nblks_tot-nblks_skip-n+lcm_s_t-1)/lcm_s_t
if(nblks_comm==0) cycle
if(myps == ips) then
k = 0
do lc=1,nvc
do i = nblks_skip+n, nblks_tot-1, lcm_s_t
ns = (i/nps)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
aux(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
k = k+nblk
enddo
enddo
endif
#if COMPLEXCASE==1
call MPI_Bcast(aux,nblks_comm*nblk*nvc,MPI_DOUBLE_COMPLEX,ips,comm_s,mpierr)
#endif
#if REALCASE==1
call MPI_Bcast(aux,nblks_comm*nblk*nvc,MPI_REAL8,ips,comm_s,mpierr)
#endif
k = 0
do lc=1,nvc
do i = nblks_skip+n, nblks_tot-1, lcm_s_t
ns = (i/npt)*nblk ! local start of block i
nl = min(nvr-i*nblk,nblk) ! length
vmat_t(ns+1:ns+nl,lc) = aux(k+1:k+nl)
k = k+nblk
enddo
enddo
endif
enddo
deallocate(aux)
end subroutine
......@@ -45,15 +45,15 @@ module from_c
public
interface
integer(kind=c_int) function elpa1_real_c(na, nev, ncols, a, lda, ev, q, ldq, &
nblk, mpi_comm_rows, mpi_comm_cols ) &
integer(kind=c_int) function elpa1_real_c(na, nev, a, lda, ev, q, ldq, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols ) &
bind(C, name="call_elpa1_real_solver_from_c")
use iso_c_binding
implicit none
integer(kind=c_int), value :: na, nev, ncols, lda, ldq, nblk, mpi_comm_rows, mpi_comm_cols
real(kind=c_double) :: a(1:lda,1:ncols), ev(1:na), q(1:ldq,1:ncols)
integer(kind=c_int), value :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
real(kind=c_double) :: a(1:lda,1:matrixCOls), ev(1:na), q(1:ldq,1:matrixCols)
end function elpa1_real_c
......@@ -73,21 +73,21 @@ module from_c
contains
function solve_elpa1_real_call_from_c(na, nev, ncols, a, lda, ev, q, ldq, &
nblk, mpi_comm_rows, mpi_comm_cols ) &
function solve_elpa1_real_call_from_c(na, nev, a, lda, ev, q, ldq, &
nblk, matrixCOls, mpi_comm_rows, mpi_comm_cols ) &
result(success)
use iso_c_binding
implicit none
integer :: na, nev, ncols, lda, ldq, nblk, mpi_comm_rows, mpi_comm_cols
integer :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
logical :: success
integer :: successC
real*8 :: a(1:lda,1:ncols), ev(1:na), q(1:ldq,1:ncols)
real*8 :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
successC = elpa1_real_c(na, nev, ncols, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols)
successC = elpa1_real_c(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols)
if (successC .eq. 1) then
success = .true.
......
......@@ -277,7 +277,7 @@ program test_complex
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_complex(na, nev, a, na_rows, ev, z, na_rows, nblk, &
mpi_comm_rows, mpi_comm_cols)
na_cols, mpi_comm_rows, mpi_comm_cols)
if (.not.(success)) then
write(error_unit,*) "solve_evp_complex produced an error! Aborting..."
......
......@@ -308,6 +308,7 @@ program test_complex2
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_complex_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
if (.not.(success)) then
......
......@@ -310,6 +310,7 @@ program test_complex2
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_complex_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE)
......
......@@ -311,6 +311,7 @@ program test_complex2
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_complex_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
if (.not.(success)) then
......
......@@ -278,7 +278,7 @@ program test_real
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_real(na, nev, a, na_rows, ev, z, na_rows, nblk, &
mpi_comm_rows, mpi_comm_cols)
na_cols, mpi_comm_rows, mpi_comm_cols)
if (.not.(success)) then
write(error_unit,*) "solve_evp_real produced an error! Aborting..."
......
......@@ -307,7 +307,7 @@ program test_real2
end if
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
success = solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
if (.not.(success)) then
......
......@@ -299,6 +299,7 @@ program test_real2
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
REAL_ELPA_KERNEL_GENERIC_SIMPLE)
......
......@@ -299,6 +299,7 @@ program test_real2
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
if (.not.(success)) then
......
......@@ -310,6 +310,7 @@ program test_real2
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
useQR=.true.)
......
......@@ -176,7 +176,7 @@ main(int argc, char** argv) {
mpierr = MPI_Barrier(MPI_COMM_WORLD);
success = elpa_solve_evp_real_1stage(na, nev, na_cols, a, na_rows, ev, z, na_rows, nblk, mpi_comm_rows, mpi_comm_cols);
success = elpa_solve_evp_real_1stage(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols);
if (success != 1) {
printf("error in ELPA solve \n");
......
......@@ -300,7 +300,7 @@ program test_real
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_real(na, nev, a, na_rows, ev, z, na_rows, nblk, &
mpi_comm_rows, mpi_comm_cols)
na_cols, mpi_comm_rows, mpi_comm_cols)
if (.not.(success)) then
write(error_unit,*) "solve_evp_real produced an error! Aborting..."
......@@ -335,8 +335,8 @@ program test_real
print *," "
end if
success = solve_elpa1_real_call_from_c(na, nev, na_cols, aFromC, na_rows, evFromC, zFromC, na_rows, nblk, &
mpi_comm_rows_fromC, mpi_comm_cols_fromC )
success = solve_elpa1_real_call_from_c(na, nev, aFromC, na_rows, evFromC, zFromC, na_rows, nblk, &
na_cols, mpi_comm_rows_fromC, mpi_comm_cols_fromC )
if (myid==0) then
print *," "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment