Commit 4169f747 authored by Andreas Marek's avatar Andreas Marek

Put redist_band in seperate module

parent 3eb49df7
......@@ -26,6 +26,7 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/mod_precision.F90 \
src/mod_mpi.F90 \
src/mod_mpi_stubs.F90 \
src/mod_redist_band.F90 \
src/mod_pack_unpack_real.F90 \
src/mod_compute_hh_trafo_real.F90 \
src/mod_compute_hh_trafo_complex.F90 \
......@@ -513,6 +514,7 @@ EXTRA_elpa1_complex_invert_trm@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_he
elpa2_test_real@SUFFIX@_SOURCES = test/Fortran/test_real2.F90
elpa2_test_real@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real@SUFFIX@_LDFLAGS = -static
elpa2_test_real@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa2_test_real@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
......
......@@ -1939,27 +1939,27 @@
if (useGPU) then
! this is not needed since a_dev is passed along from one subroutine to the other
successCUDA = cuda_memcpy ( &
#if REALCASE == 1
loc(a), &
#endif
#if COMPLEXCASE == 1
loc(a(1,1)), &
#endif
a_dev, lda*na_cols* &
#if REALCASE == 1
size_of_PRECISION_real, &
#endif
#if COMPLEXCASE ==1
size_of_PRECISION_complex,&
#endif
cudaMemcpyDeviceToHost)
if (.not.(successCUDA)) then
print *,"bandred_&
&MATH_DATATYPE&
&: error in cudaMemcpy"
stop
endif
! successCUDA = cuda_memcpy ( &
!#if REALCASE == 1
! loc(a), &
!#endif
!#if COMPLEXCASE == 1
! loc(a(1,1)), &
!#endif
! a_dev, lda*na_cols* &
!#if REALCASE == 1
! size_of_PRECISION_real, &
!#endif
!#if COMPLEXCASE ==1
! size_of_PRECISION_complex,&
!#endif
! cudaMemcpyDeviceToHost)
! if (.not.(successCUDA)) then
! print *,"bandred_&
! &MATH_DATATYPE&
! &: error in cudaMemcpy"
! stop
! endif
! successCUDA = cuda_free(a_dev)
! if (.not.(successCUDA)) then
......
......@@ -110,95 +110,6 @@ module ELPA2_compute
! 1 for blocked (maxrank: nblk)
contains
! real double precision first
#define DOUBLE_PRECISION_REAL 1
#define REAL_DATATYPE rk8
#define BYTESIZE 8
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_double"
#define PRECISION double
#include "redist_band.X90"
#undef DOUBLE_PRECISION_REAL
#undef REAL_DATATYPE
#undef BYTESIZE
#undef REALCASE
#undef DOUBLE_PRECISION
#undef PRECISION_SUFFIX
#undef PRECISION
! single precision
#ifdef WANT_SINGLE_PRECISION_REAL
#undef DOUBLE_PRECISION_REAL
#undef DOUBLE_PRECISION
#define REAL_DATATYPE rk4
#define BYTESIZE 4
#define REALCASE 1
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_single"
#undef PRECISION
#define PRECISION single
#include "redist_band.X90"
#undef REAL_DATATYPE
#undef BYTESIZE
#undef REALCASE
#undef PRECISION_SUFFIX
#undef PRECISION
#endif /* WANT_SINGLE_PRECISION_REAL */
! double precision
#define DOUBLE_PRECISION_COMPLEX 1
#define COMPLEX_DATATYPE ck8
#define BYTESIZE 16
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_double"
#undef PRECISION
#define PRECISION double
#include "redist_band.X90"
#undef COMPLEX_DATATYPE
#undef BYTESIZE
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
#undef DOUBLE_PRECISION_COMPLEX
#undef PRECISION_SUFFIX
#undef PRECISION
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_REAL
#undef DOUBLE_PRECISION
#define COMPLEX_DATATYPE ck4
#define COMPLEXCASE 1
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_single"
#undef PRECISION
#define PRECISION single
#include "redist_band.X90"
#undef COMPLEX_DATATYPE
#undef BYTESIZE
#undef COMPLEXCASE
#undef PRECISION_SUFFIX
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
! real double precision
#define DOUBLE_PRECISION_REAL 1
#define REAL_DATATYPE rk8
......
......@@ -302,6 +302,7 @@
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) "Time " // "bandred_&
&MATH_DATATYPE&
&_&
&PRECISION " // " :",ttt1-ttt0
end if ! matrix not already banded on input
......@@ -323,7 +324,7 @@
&MATH_DATATYPE&
&_&
&PRECISION&
(na, nbw, nblk, a, a_dev, lda, ev, e, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, useGPU)
(na, nbw, nblk, a, a_dev, lda, ev, e, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, do_useGPU)
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
......
......@@ -99,6 +99,7 @@
use elpa2_workload
use precision
use iso_c_binding
use redist
implicit none
logical, intent(in) :: useGPU
......@@ -174,7 +175,6 @@
#ifndef WITH_MPI
integer(kind=ik) :: startAddr
#endif
call timer%start("tridiag_band_&
&MATH_DATATYPE&
&" // &
......
#include "config-f90.h"
module redist
public
contains
#define DOUBLE_PRECISION_REAL 1
#define REAL_DATATYPE rk8
#define BYTESIZE 8
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_double"
#define PRECISION double
#include "redist_band.X90"
#undef DOUBLE_PRECISION_REAL
#undef REAL_DATATYPE
#undef BYTESIZE
#undef REALCASE
#undef DOUBLE_PRECISION
#undef PRECISION_SUFFIX
#undef PRECISION
! single precision
#ifdef WANT_SINGLE_PRECISION_REAL
#undef DOUBLE_PRECISION_REAL
#undef DOUBLE_PRECISION
#define REAL_DATATYPE rk4
#define BYTESIZE 4
#define REALCASE 1
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_single"
#undef PRECISION
#define PRECISION single
#include "redist_band.X90"
#undef REAL_DATATYPE
#undef BYTESIZE
#undef REALCASE
#undef PRECISION_SUFFIX
#undef PRECISION
#endif /* WANT_SINGLE_PRECISION_REAL */
! double precision
#define DOUBLE_PRECISION_COMPLEX 1
#define COMPLEX_DATATYPE ck8
#define BYTESIZE 16
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_double"
#undef PRECISION
#define PRECISION double
#include "redist_band.X90"
#undef COMPLEX_DATATYPE
#undef BYTESIZE
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
#undef DOUBLE_PRECISION_COMPLEX
#undef PRECISION_SUFFIX
#undef PRECISION
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_REAL
#undef DOUBLE_PRECISION
#define COMPLEX_DATATYPE ck4
#define COMPLEXCASE 1
#include "precision_macros.h"
#undef PRECISION_SUFFIX
#define PRECISION_SUFFIX "_single"
#undef PRECISION
#define PRECISION single
#include "redist_band.X90"
#undef COMPLEX_DATATYPE
#undef BYTESIZE
#undef COMPLEXCASE
#undef PRECISION_SUFFIX
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
end module redist
......@@ -58,14 +58,14 @@ subroutine redist_band_&
#if COMPLEXCASE == 1
c_a, &
#endif
a_dev, lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm, &
a_dev, lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_global, &
#if REALCASE == 1
r_ab, &
r_ab, useGPU)
#endif
#if COMPLEXCASE == 1
c_ab, &
c_ab, useGPU)
#endif
useGPU)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
......@@ -75,10 +75,12 @@ subroutine redist_band_&
use precision
use iso_c_binding
use cuda_functions
use elpa_utilities, only : local_index
use elpa_mpi
implicit none
logical, intent(in) :: useGPU
integer(kind=ik), intent(in) :: lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm
integer(kind=ik), intent(in) :: lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_global
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(in) :: r_a(lda, matrixCols)
#endif
......@@ -89,7 +91,7 @@ subroutine redist_band_&
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(out) :: r_ab(:,:)
#endif
integer(kind=c_intptr_t) :: a_dev
#if COMPLEXCASE == 1
complex(kind=COMPLEX_DATATYPE), intent(out) :: c_ab(:,:)
#endif
......@@ -108,6 +110,8 @@ subroutine redist_band_&
integer(kind=ik) :: nblocks_total, il, jl, l_rows, l_cols, n_off
logical :: successCUDA
integer(kind=c_intptr_t) :: a_dev
call timer%start("redist_band_&
&MATH_DATATYPE&
&" // &
......@@ -123,19 +127,19 @@ subroutine redist_band_&
#if COMPLEXCASE == 1
loc(c_a(1,1)), &
#endif
a_dev, lda*matrixCols* &
int(a_dev,kind=c_size_t), int(lda*matrixCols* &
#if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL
size_of_double_real_datatype, &
size_of_double_real_datatype,kind=c_size_t), &
#else
size_of_single_real_datatype, &
size_of_single_real_datatype,kind=c_size_t), &
#endif
#endif
#if COMPLEXCASE ==1
#ifdef DOUBLE_PRECISION_COMPLEX
size_of_double_complex_datatype,&
size_of_double_complex_datatype,kind=c_size_t), &
#else
size_of_single_complex_datatype,&
size_of_single_complex_datatype,kind=c_size_t), &
#endif
#endif
cudaMemcpyDeviceToHost)
......@@ -148,8 +152,9 @@ subroutine redist_band_&
endif ! useGPU
call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm,my_pe,mpierr)
call mpi_comm_size(mpi_comm,n_pes,mpierr)
print *, mpi_comm_global
call mpi_comm_rank(mpi_comm_global,my_pe,mpierr)
call mpi_comm_size(mpi_comm_global,n_pes,mpierr)
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
......@@ -169,10 +174,10 @@ subroutine redist_band_&
call timer%start("mpi_communication")
#ifdef WITH_OPENMP
global_id_tmp(:,:) = global_id(:,:)
call mpi_allreduce(global_id_tmp, global_id, np_rows*np_cols, mpi_integer, mpi_sum, mpi_comm, mpierr)
call mpi_allreduce(global_id_tmp, global_id, np_rows*np_cols, mpi_integer, mpi_sum, mpi_comm_global, mpierr)
deallocate(global_id_tmp)
#else
call mpi_allreduce(mpi_in_place, global_id, np_rows*np_cols, mpi_integer, mpi_sum, mpi_comm, mpierr)
call mpi_allreduce(mpi_in_place, global_id, np_rows*np_cols, mpi_integer, mpi_sum, mpi_comm_global, mpierr)
#endif
call timer%stop("mpi_communication")
#endif /* WITH_MPI */
......@@ -298,9 +303,9 @@ subroutine redist_band_&
#if REALCASE==1
#ifdef DOUBLE_PRECISION_REAL
call MPI_Alltoallv(r_sbuf, ncnt_s, nstart_s, MPI_REAL8, r_rbuf, ncnt_r, nstart_r, MPI_REAL8, mpi_comm, mpierr)
call MPI_Alltoallv(r_sbuf, ncnt_s, nstart_s, MPI_REAL8, r_rbuf, ncnt_r, nstart_r, MPI_REAL8, mpi_comm_global, mpierr)
#else
call MPI_Alltoallv(r_sbuf, ncnt_s, nstart_s, MPI_REAL4, r_rbuf, ncnt_r, nstart_r, MPI_REAL4, mpi_comm, mpierr)
call MPI_Alltoallv(r_sbuf, ncnt_s, nstart_s, MPI_REAL4, r_rbuf, ncnt_r, nstart_r, MPI_REAL4, mpi_comm_global, mpierr)
#endif
#endif /* REALCASE==1 */
......@@ -308,9 +313,9 @@ subroutine redist_band_&
#if COMPLEXCASE==1
#ifdef DOUBLE_PRECISION_COMPLEX
call MPI_Alltoallv(c_sbuf, ncnt_s, nstart_s, MPI_COMPLEX16, c_rbuf, ncnt_r, nstart_r, MPI_COMPLEX16, mpi_comm, mpierr)
call MPI_Alltoallv(c_sbuf, ncnt_s, nstart_s, MPI_COMPLEX16, c_rbuf, ncnt_r, nstart_r, MPI_COMPLEX16, mpi_comm_global, mpierr)
#else
call MPI_Alltoallv(c_sbuf, ncnt_s, nstart_s, MPI_COMPLEX, c_rbuf, ncnt_r, nstart_r, MPI_COMPLEX, mpi_comm, mpierr)
call MPI_Alltoallv(c_sbuf, ncnt_s, nstart_s, MPI_COMPLEX, c_rbuf, ncnt_r, nstart_r, MPI_COMPLEX, mpi_comm_global, mpierr)
#endif
#endif /* COMPLEXCASE==1 */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment