There is a maintenance of MPCDF Gitlab on Thursday, April 22st 2020, 9:00 am CEST - Expect some service interruptions during this time

Commit 9e3f0636 authored by Andreas Marek's avatar Andreas Marek

New feature "bandwidth"

Setting a bandwdith of the input matrix via

elpa%set("bandwidth",bw)

tells ELPA 2stage, that matrix is already a banded matrix
with bandwidth bw. Then some compute steps can be skipped
parent a59bc1fd
...@@ -417,6 +417,8 @@ noinst_PROGRAMS = \ ...@@ -417,6 +417,8 @@ noinst_PROGRAMS = \
test_real_double_2stage \ test_real_double_2stage \
test_complex_double_1stage \ test_complex_double_1stage \
test_complex_double_2stage \ test_complex_double_2stage \
elpa_test_new_interface_real_2stage_banded@SUFFIX@ \
elpa_test_new_interface_complex_2stage_banded@SUFFIX@ \
elpa_test_new_interface_real_2stage@SUFFIX@ \ elpa_test_new_interface_real_2stage@SUFFIX@ \
elpa_test_new_interface_complex_2stage@SUFFIX@ \ elpa_test_new_interface_complex_2stage@SUFFIX@ \
elpa_test_new_interface_real_1stage@SUFFIX@ \ elpa_test_new_interface_real_1stage@SUFFIX@ \
...@@ -426,6 +428,7 @@ if WANT_SINGLE_PRECISION_REAL ...@@ -426,6 +428,7 @@ if WANT_SINGLE_PRECISION_REAL
noinst_PROGRAMS += \ noinst_PROGRAMS += \
test_real_single_1stage \ test_real_single_1stage \
test_real_single_2stage \ test_real_single_2stage \
elpa_test_new_interface_real_single_2stage_banded@SUFFIX@ \
elpa_test_new_interface_real_single_2stage@SUFFIX@ \ elpa_test_new_interface_real_single_2stage@SUFFIX@ \
elpa_test_new_interface_real_single_1stage@SUFFIX@ elpa_test_new_interface_real_single_1stage@SUFFIX@
endif endif
...@@ -434,6 +437,7 @@ if WANT_SINGLE_PRECISION_COMPLEX ...@@ -434,6 +437,7 @@ if WANT_SINGLE_PRECISION_COMPLEX
noinst_PROGRAMS += \ noinst_PROGRAMS += \
test_complex_single_1stage \ test_complex_single_1stage \
test_complex_single_2stage \ test_complex_single_2stage \
elpa_test_new_interface_complex_single_2stage_banded@SUFFIX@ \
elpa_test_new_interface_complex_single_2stage@SUFFIX@ \ elpa_test_new_interface_complex_single_2stage@SUFFIX@ \
elpa_test_new_interface_complex_single_1stage@SUFFIX@ elpa_test_new_interface_complex_single_1stage@SUFFIX@
endif endif
...@@ -467,11 +471,9 @@ noinst_PROGRAMS += \ ...@@ -467,11 +471,9 @@ noinst_PROGRAMS += \
elpa2_test_real_default@SUFFIX@ \ elpa2_test_real_default@SUFFIX@ \
elpa2_test_real_qr@SUFFIX@ \ elpa2_test_real_qr@SUFFIX@ \
elpa2_test_real_api@SUFFIX@ \ elpa2_test_real_api@SUFFIX@ \
elpa2_test_real_banded@SUFFIX@ \
elpa2_test_complex@SUFFIX@ \ elpa2_test_complex@SUFFIX@ \
elpa2_test_complex_default@SUFFIX@ \ elpa2_test_complex_default@SUFFIX@ \
elpa2_test_complex_api@SUFFIX@ \ elpa2_test_complex_api@SUFFIX@ \
elpa2_test_complex_banded@SUFFIX@ \
elpa_driver_real@SUFFIX@ \ elpa_driver_real@SUFFIX@ \
elpa_driver_complex@SUFFIX@ \ elpa_driver_complex@SUFFIX@ \
elpa1_real_toeplitz@SUFFIX@ \ elpa1_real_toeplitz@SUFFIX@ \
...@@ -823,10 +825,10 @@ elpa2_test_real_api@SUFFIX@_LDADD = $(build_lib) ...@@ -823,10 +825,10 @@ elpa2_test_real_api@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_api@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules elpa2_test_real_api@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_elpa2_test_real_api@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa2_test_real_api@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa2_test_real_banded@SUFFIX@_SOURCES = test/Fortran/test_real2_banded.F90 elpa_test_new_interface_real_2stage_banded@SUFFIX@_SOURCES = test/Fortran/test_new_interface_real_2stage_banded.F90
elpa2_test_real_banded@SUFFIX@_LDADD = $(build_lib) elpa_test_new_interface_real_2stage_banded@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules elpa_test_new_interface_real_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_elpa2_test_real_banded@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa_test_new_interface_real_2stage_banded@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa1_test_complex@SUFFIX@_SOURCES = test/Fortran/elpa1/complex.F90 elpa1_test_complex@SUFFIX@_SOURCES = test/Fortran/elpa1/complex.F90
elpa1_test_complex@SUFFIX@_LDADD = $(build_lib) elpa1_test_complex@SUFFIX@_LDADD = $(build_lib)
...@@ -848,10 +850,10 @@ elpa2_test_complex_api@SUFFIX@_LDADD = $(build_lib) ...@@ -848,10 +850,10 @@ elpa2_test_complex_api@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_api@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules elpa2_test_complex_api@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_elpa2_test_complex_api@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa2_test_complex_api@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa2_test_complex_banded@SUFFIX@_SOURCES = test/Fortran/test_complex2_banded.F90 elpa_test_new_interface_complex_2stage_banded@SUFFIX@_SOURCES = test/Fortran/test_new_interface_complex_2stage_banded.F90
elpa2_test_complex_banded@SUFFIX@_LDADD = $(build_lib) elpa_test_new_interface_complex_2stage_banded@SUFFIX@_LDADD = $(build_lib)
elpa2_test_complex_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules elpa_test_new_interface_complex_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_elpa2_test_complex_banded@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa_test_new_interface_complex_2stage_banded@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa_driver_real@SUFFIX@_SOURCES = test/Fortran/test_driver_real.F90 elpa_driver_real@SUFFIX@_SOURCES = test/Fortran/test_driver_real.F90
...@@ -869,6 +871,11 @@ elpa_tests@SUFFIX@_LDADD = $(build_lib) ...@@ -869,6 +871,11 @@ elpa_tests@SUFFIX@_LDADD = $(build_lib)
elpa_tests@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules elpa_tests@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
elpa_test_new_interface_real_single_2stage_banded@SUFFIX@_SOURCES = test/Fortran/test_new_interface_real_single_2stage_banded.F90
elpa_test_new_interface_real_single_2stage_banded@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_real_single_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_elpa_test_new_interface_real_single_2stage_banded@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa1_test_real_single_precision@SUFFIX@_SOURCES = test/Fortran/elpa1/single_real.F90 elpa1_test_real_single_precision@SUFFIX@_SOURCES = test/Fortran/elpa1/single_real.F90
elpa1_test_real_single_precision@SUFFIX@_LDADD = $(build_lib) elpa1_test_real_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa1_test_real_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules elpa1_test_real_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
...@@ -927,6 +934,12 @@ EXTRA_elpa2_test_real_api_single_precision@SUFFIX@_DEPENDENCIES = test/Fortran/e ...@@ -927,6 +934,12 @@ EXTRA_elpa2_test_real_api_single_precision@SUFFIX@_DEPENDENCIES = test/Fortran/e
endif endif
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
elpa_test_new_interface_complex_single_2stage_banded@SUFFIX@_SOURCES = test/Fortran/test_new_interface_complex_single_2stage_banded.F90
elpa_test_new_interface_complex_single_2stage_banded@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_complex_single_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_elpa_test_new_interface_complex_single_2stage_banded@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa1_test_complex_single_precision@SUFFIX@_SOURCES = test/Fortran/elpa1/single_complex.F90 elpa1_test_complex_single_precision@SUFFIX@_SOURCES = test/Fortran/elpa1/single_complex.F90
elpa1_test_complex_single_precision@SUFFIX@_LDADD = $(build_lib) elpa1_test_complex_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa1_test_complex_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules elpa1_test_complex_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
...@@ -1025,6 +1038,8 @@ check_SCRIPTS = \ ...@@ -1025,6 +1038,8 @@ check_SCRIPTS = \
test_real_double_2stage.sh \ test_real_double_2stage.sh \
test_complex_double_1stage.sh \ test_complex_double_1stage.sh \
test_complex_double_2stage.sh \ test_complex_double_2stage.sh \
elpa_test_new_interface_real_2stage_banded@SUFFIX@.sh \
elpa_test_new_interface_complex_2stage_banded@SUFFIX@.sh \
elpa_test_new_interface_real_2stage@SUFFIX@.sh \ elpa_test_new_interface_real_2stage@SUFFIX@.sh \
elpa_test_new_interface_complex_2stage@SUFFIX@.sh \ elpa_test_new_interface_complex_2stage@SUFFIX@.sh \
elpa_test_new_interface_real_1stage@SUFFIX@.sh \ elpa_test_new_interface_real_1stage@SUFFIX@.sh \
...@@ -1034,6 +1049,7 @@ if WANT_SINGLE_PRECISION_REAL ...@@ -1034,6 +1049,7 @@ if WANT_SINGLE_PRECISION_REAL
check_SCRIPTS += \ check_SCRIPTS += \
test_real_single_1stage.sh \ test_real_single_1stage.sh \
test_real_single_2stage.sh \ test_real_single_2stage.sh \
elpa_test_new_interface_real_single_2stage_banded@SUFFIX@.sh \
elpa_test_new_interface_real_single_2stage@SUFFIX@.sh \ elpa_test_new_interface_real_single_2stage@SUFFIX@.sh \
elpa_test_new_interface_real_single_1stage@SUFFIX@.sh elpa_test_new_interface_real_single_1stage@SUFFIX@.sh
endif endif
...@@ -1042,6 +1058,7 @@ if WANT_SINGLE_PRECISION_COMPLEX ...@@ -1042,6 +1058,7 @@ if WANT_SINGLE_PRECISION_COMPLEX
check_SCRIPTS += \ check_SCRIPTS += \
test_complex_single_1stage.sh \ test_complex_single_1stage.sh \
test_complex_single_2stage.sh \ test_complex_single_2stage.sh \
elpa_test_new_interface_complex_single_2stage_banded@SUFFIX@.sh \
elpa_test_new_interface_complex_single_2stage@SUFFIX@.sh \ elpa_test_new_interface_complex_single_2stage@SUFFIX@.sh \
elpa_test_new_interface_complex_single_1stage@SUFFIX@.sh elpa_test_new_interface_complex_single_1stage@SUFFIX@.sh
endif endif
...@@ -1076,9 +1093,7 @@ check_SCRIPTS += \ ...@@ -1076,9 +1093,7 @@ check_SCRIPTS += \
elpa2_test_real_qr@SUFFIX@.sh \ elpa2_test_real_qr@SUFFIX@.sh \
elpa2_test_complex_default@SUFFIX@.sh \ elpa2_test_complex_default@SUFFIX@.sh \
elpa2_test_real_api@SUFFIX@.sh \ elpa2_test_real_api@SUFFIX@.sh \
elpa2_test_real_banded@SUFFIX@.sh \
elpa2_test_complex_api@SUFFIX@.sh \ elpa2_test_complex_api@SUFFIX@.sh \
elpa2_test_complex_banded@SUFFIX@.sh \
elpa_driver_real@SUFFIX@.sh \ elpa_driver_real@SUFFIX@.sh \
elpa_driver_complex@SUFFIX@.sh \ elpa_driver_complex@SUFFIX@.sh \
elpa1_real_toeplitz@SUFFIX@.sh \ elpa1_real_toeplitz@SUFFIX@.sh \
......
...@@ -255,18 +255,18 @@ ...@@ -255,18 +255,18 @@
endif endif
endif endif
bandwidth = -1 if (obj%is_set("bandwidth") == 1) then
if (bandwidth .ne. -1) then nbw=obj%get("bandwidth")
nbw = bandwidth
if ((nbw == 0) .or. (mod(nbw, nblk) .ne. 0)) then if ((nbw == 0) .or. (mod(nbw, nblk) .ne. 0)) then
if (wantDebug) then if (wantDebug) then
write(error_unit,*) "Specified bandwidth has to be a multiple of blocksize" write(error_unit,*) "Specified bandwidth has to be a multiple of blocksize: ",nbw
endif endif
print *, "Specified bandwidth has to be a multiple of blocksize" print *, "Specified bandwidth has to be a multiple of blocksize"
success = .false. success = .false.
return return
endif endif
!ttts = MPI_Wtime()
else else
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32 ! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
...@@ -404,7 +404,7 @@ ...@@ -404,7 +404,7 @@
stop 1 stop 1
endif endif
if( bandwidth == -1) then if(obj%is_set("bandwidth") .ne. 1) then
if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then
! copy to device if we want to continue on GPU ! copy to device if we want to continue on GPU
successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_datatype) successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_datatype)
......
...@@ -121,7 +121,7 @@ static const elpa_index_int_entry_t int_entries[] = { ...@@ -121,7 +121,7 @@ static const elpa_index_int_entry_t int_entries[] = {
INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process"), INT_PARAMETER_ENTRY("local_ncols", "Number of matrix columns stored on this process"),
INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition"), INT_PARAMETER_ENTRY("process_row", "Process row number in the 2D domain decomposition"),
INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition"), INT_PARAMETER_ENTRY("process_col", "Process column number in the 2D domain decomposition"),
INT_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input", -1, INT_ENTRY("bandwidth", "If specified, a band matrix with this bandwidth is expected as input; bandwidth must be multiply of nblk", -1,
NULL, NULL, bw_is_valid, NULL), NULL, NULL, bw_is_valid, NULL),
INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"), INT_ANY_ENTRY("mpi_comm_rows", "Communicator for inter-row communication"),
INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"), INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication"),
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
! !
! !
#include "config-f90.h" #include "config-f90.h"
#include "assert.h"
!> !>
!> Fortran test programm to demonstrates the use of !> Fortran test programm to demonstrates the use of
!> ELPA 2 complex case library. !> ELPA 2 complex case library.
...@@ -64,7 +65,7 @@ ...@@ -64,7 +65,7 @@
!> the environment variable "COMPLEX_ELPA_KERNEL" to an !> the environment variable "COMPLEX_ELPA_KERNEL" to an
!> appropiate value. !> appropiate value.
!> !>
program test_complex2_double_precision program test_complex2_double_banded
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Standard eigenvalue problem - COMPLEX version ! Standard eigenvalue problem - COMPLEX version
...@@ -78,10 +79,8 @@ program test_complex2_double_precision ...@@ -78,10 +79,8 @@ program test_complex2_double_precision
! distributed along with the original code in the file "COPYING". ! distributed along with the original code in the file "COPYING".
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
use precision use precision
use elpa1 use elpa
use elpa2
use mod_check_for_gpu, only : check_for_gpu use mod_check_for_gpu, only : check_for_gpu
use elpa_utilities, only : error_unit
#ifdef WITH_OPENMP #ifdef WITH_OPENMP
use test_util use test_util
...@@ -130,21 +129,20 @@ program test_complex2_double_precision ...@@ -130,21 +129,20 @@ program test_complex2_double_precision
integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level
#endif #endif
type(output_t) :: write_to_file type(output_t) :: write_to_file
logical :: success integer(kind=ik) :: success
character(len=8) :: task_suffix character(len=8) :: task_suffix
integer(kind=ik) :: j integer(kind=ik) :: j
logical :: successELPA
integer(kind=ik) :: numberOfDevices integer(kind=ik) :: numberOfDevices
logical :: gpuAvailable logical :: gpuAvailable
integer(kind=ik) :: global_row, global_col, local_row, local_col integer(kind=ik) :: global_row, global_col, local_row, local_col
integer(kind=ik) :: bandwidth integer(kind=ik) :: bandwidth
class(elpa_t), pointer :: e
#define COMPLEXCASE #define COMPLEXCASE
#define DOUBLE_PRECISION_COMPLEX 1 #define DOUBLE_PRECISION_COMPLEX 1
successELPA = .true.
gpuAvailable = .false. gpuAvailable = .false.
call read_input_parameters_traditional(na, nev, nblk, write_to_file) call read_input_parameters_traditional(na, nev, nblk, write_to_file)
...@@ -182,7 +180,7 @@ program test_complex2_double_precision ...@@ -182,7 +180,7 @@ program test_complex2_double_precision
call timer%enable() call timer%enable()
call timer%start("program: test_complex2_double_precision") call timer%start("program: test_complex2_double_banded")
#endif #endif
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
...@@ -224,16 +222,6 @@ program test_complex2_double_precision ...@@ -224,16 +222,6 @@ program test_complex2_double_precision
print '(a)','| Past BLACS_Gridinfo.' print '(a)','| Past BLACS_Gridinfo.'
end if end if
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set in elpa_get_communicators.
mpierr = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
mpi_comm_rows, mpi_comm_cols)
if (myid==0) then
print '(a)','| Past split communicator setup for rows and columns.'
end if
! Determine the necessary size of the distributed matrices, ! Determine the necessary size of the distributed matrices,
! we use the Scalapack tools routine NUMROC for that. ! we use the Scalapack tools routine NUMROC for that.
...@@ -277,50 +265,53 @@ program test_complex2_double_precision ...@@ -277,50 +265,53 @@ program test_complex2_double_precision
call timer%stop("set up matrix") call timer%stop("set up matrix")
#endif #endif
! set print flag in elpa1 if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then
elpa_print_times = .true. print *, "ELPA API version not supported"
stop 1
!------------------------------------------------------------------------------- endif
! Calculate eigenvalues/eigenvectors #ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_MPI call timer%start("prepare_elpa")
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
#endif #endif
successELPA = elpa_solve_evp_complex_2stage_double(na, nev, a, na_rows, ev, z, na_rows, nblk, & e => elpa_allocate()
na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
call e%set("na", na, success)
if (.not.(successELPA)) then assert_elpa_ok(success)
write(error_unit,*) "solve_evp_complex_2stage produced an error! Aborting..." call e%set("nev", nev, success)
#ifdef WITH_MPI assert_elpa_ok(success)
call MPI_ABORT(mpi_comm_world, 1, mpierr) call e%set("local_nrows", na_rows, success)
assert_elpa_ok(success)
call e%set("local_ncols", na_cols, success)
assert_elpa_ok(success)
call e%set("nblk", nblk, success)
assert_elpa_ok(success)
call e%set("mpi_comm_parent", MPI_COMM_WORLD, success)
assert_elpa_ok(success)
call e%set("process_row", my_prow, success)
assert_elpa_ok(success)
call e%set("process_col", my_pcol, success)
assert_elpa_ok(success)
call e%set("bandwidth", bandwidth, success)
assert_elpa_ok(success)
assert(e%setup() .eq. ELPA_OK)
call e%set("solver", ELPA_SOLVER_2STAGE, success)
assert_elpa_ok(success)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("prepare_elpa")
#endif #endif
endif #ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve")
#endif
call e%solve(a, ev, z, success)
assert_elpa_ok(success)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve")
#endif
call elpa_deallocate(e)
if(myid == 0) print *,'Time transform to tridi :',time_evp_fwd call elpa_uninit()
if(myid == 0) print *,'Time solve tridi :',time_evp_solve
if(myid == 0) print *,'Time transform back EVs :',time_evp_back
if(myid == 0) print *,'Total time (sum above) :',time_evp_back+time_evp_solve+time_evp_fwd
if(write_to_file%eigenvectors) then
write(unit = task_suffix, fmt = '(i8.8)') myid
open(17,file="EVs_complex2_out_task_"//task_suffix(1:8)//".txt",form='formatted',status='new')
write(17,*) "Part of eigenvectors: na_rows=",na_rows,"of na=",na," na_cols=",na_cols," of na=",na
do i=1,na_rows
do j=1,na_cols
write(17,*) "row=",i," col=",j," element of eigenvector=",z(i,j)
enddo
enddo
close(17)
endif
if(write_to_file%eigenvalues) then
if (myid == 0) then
open(17,file="Eigenvalues_complex2_out.txt",form='formatted',status='new')
do i=1,na
write(17,*) i,ev(i)
enddo
close(17)
endif
endif
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Test correctness of result (using plain scalapack routines) ! Test correctness of result (using plain scalapack routines)
......
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
#include "assert.h"
!>
!> Fortran test programm to demonstrates the use of
!> ELPA 2 complex case library.
!> If "HAVE_REDIRECT" was defined at build time
!> the stdout and stderr output of each MPI task
!> can be redirected to files if the environment
!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set
!> to "true".
!>
!> By calling executable [arg1] [arg2] [arg3] [arg4]
!> one can define the size (arg1), the number of
!> Eigenvectors to compute (arg2), and the blocking (arg3).
!> If these values are not set default values (4000, 1500, 16)
!> are choosen.
!> If these values are set the 4th argument can be
!> "output", which specifies that the EV's are written to
!> an ascii file.
!>
!> The complex ELPA 2 kernel is set as the default kernel.
!> However, this can be overriden by setting
!> the environment variable "COMPLEX_ELPA_KERNEL" to an
!> appropiate value.
!>
program test_complex2_single_banded
!-------------------------------------------------------------------------------
! Standard eigenvalue problem - COMPLEX version
!
! This program demonstrates the use of the ELPA module
! together with standard scalapack routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!-------------------------------------------------------------------------------
use precision
use elpa
use mod_check_for_gpu, only : check_for_gpu
#ifdef WITH_OPENMP
use test_util
#endif
use mod_read_input_parameters
use mod_check_correctness
use mod_setup_mpi
use mod_blacs_infrastructure
use mod_prepare_matrix
use elpa_mpi
#ifdef HAVE_REDIRECT
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
!-------------------------------------------------------------------------------
! Please set system size parameters below!
! na: System size
! nev: Number of eigenvectors to be calculated
! nblk: Blocking factor in block cyclic distribution
!-------------------------------------------------------------------------------
integer(kind=ik) :: nblk
integer(kind=ik) :: na, nev
integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
#ifdef WITH_MPI
integer(kind=ik), external :: numroc
#endif
complex(kind=ck4), parameter :: CZERO = (0.0_rk4,0.0_rk4), CONE = (1.0_rk4,0.0_rk4)
real(kind=rk4), allocatable :: ev(:)
complex(kind=ck4), allocatable :: a(:,:), z(:,:), as(:,:)
integer(kind=ik) :: STATUS
#ifdef WITH_OPENMP
integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level
#endif
type(output_t) :: write_to_file
integer(kind=ik) :: success
character(len=8) :: task_suffix
integer(kind=ik) :: j
integer(kind=ik) :: numberOfDevices
logical :: gpuAvailable
integer(kind=ik) :: global_row, global_col, local_row, local_col
integer(kind=ik) :: bandwidth
class(elpa_t), pointer :: e
#define COMPLEXCASE
#define DOUBLE_PRECISION_COMPLEX 1
gpuAvailable = .false.
call read_input_parameters_traditional(na, nev, nblk, write_to_file)
!-------------------------------------------------------------------------------
! MPI Initialization