Commit 9ec10a93 authored by Andreas Marek's avatar Andreas Marek
Browse files

New interface: test cases for GPU usage

parent c6b13e12
...@@ -481,18 +481,27 @@ noinst_PROGRAMS += \ ...@@ -481,18 +481,27 @@ noinst_PROGRAMS += \
elpa1_test_complex_gpu@SUFFIX@ \ elpa1_test_complex_gpu@SUFFIX@ \
elpa1_test_real_gpu@SUFFIX@ \ elpa1_test_real_gpu@SUFFIX@ \
elpa2_test_complex_gpu@SUFFIX@ \ elpa2_test_complex_gpu@SUFFIX@ \
elpa2_test_real_gpu@SUFFIX@ elpa2_test_real_gpu@SUFFIX@ \
elpa_test_new_interface_complex_1stage_gpu@SUFFIX@ \
elpa_test_new_interface_complex_2stage_gpu@SUFFIX@ \
elpa_test_new_interface_real_1stage_gpu@SUFFIX@ \
elpa_test_new_interface_real_2stage_gpu@SUFFIX@
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
noinst_PROGRAMS += \ noinst_PROGRAMS += \
elpa1_test_real_gpu_single_precision@SUFFIX@ \ elpa1_test_real_gpu_single_precision@SUFFIX@ \
elpa2_test_real_gpu_single_precision@SUFFIX@ elpa2_test_real_gpu_single_precision@SUFFIX@ \
elpa_test_new_interface_real_single_1stage_gpu@SUFFIX@ \
elpa_test_new_interface_real_single_2stage_gpu@SUFFIX@
endif endif
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
noinst_PROGRAMS += \ noinst_PROGRAMS += \
elpa1_test_complex_gpu_single_precision@SUFFIX@ \ elpa1_test_complex_gpu_single_precision@SUFFIX@ \
elpa2_test_complex_gpu_single_precision@SUFFIX@ elpa2_test_complex_gpu_single_precision@SUFFIX@ \
elpa_test_new_interface_complex_single_1stage_gpu@SUFFIX@ \
elpa_test_new_interface_complex_single_2stage_gpu@SUFFIX@
endif endif
endif endif
...@@ -815,6 +824,26 @@ EXTRA_elpa2_test_complex_api_single_precision@SUFFIX@_DEPENDENCIES = test/Fortra ...@@ -815,6 +824,26 @@ EXTRA_elpa2_test_complex_api_single_precision@SUFFIX@_DEPENDENCIES = test/Fortra
endif endif
if WITH_GPU_VERSION if WITH_GPU_VERSION
elpa_test_new_interface_real_1stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_real_1stage_gpu.F90
elpa_test_new_interface_real_1stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_real_1stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_real_1stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa_test_new_interface_complex_1stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_complex_1stage_gpu.F90
elpa_test_new_interface_complex_1stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_complex_1stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_complex_1stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa_test_new_interface_real_2stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_real_2stage_gpu.F90
elpa_test_new_interface_real_2stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_real_2stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_real_2stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa_test_new_interface_complex_2stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_complex_2stage_gpu.F90
elpa_test_new_interface_complex_2stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_complex_2stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_complex_2stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa1_test_real_gpu@SUFFIX@_SOURCES = test/Fortran/test_real_gpu.F90 elpa1_test_real_gpu@SUFFIX@_SOURCES = test/Fortran/test_real_gpu.F90
elpa1_test_real_gpu@SUFFIX@_LDADD = $(build_lib) elpa1_test_real_gpu@SUFFIX@_LDADD = $(build_lib)
elpa1_test_real_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules elpa1_test_real_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
...@@ -836,6 +865,16 @@ elpa2_test_complex_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_module ...@@ -836,6 +865,16 @@ elpa2_test_complex_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_module
EXTRA_elpa2_test_complex_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa2_test_complex_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
elpa_test_new_interface_real_single_1stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_real_single_1stage_gpu.F90
elpa_test_new_interface_real_single_1stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_real_single_1stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_real_single_1stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa_test_new_interface_real_single_2stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_real_single_2stage_gpu.F90
elpa_test_new_interface_real_single_2stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_real_single_2stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_real_single_2stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa1_test_real_gpu_single_precision@SUFFIX@_SOURCES = test/Fortran/test_real_gpu_single.F90 elpa1_test_real_gpu_single_precision@SUFFIX@_SOURCES = test/Fortran/test_real_gpu_single.F90
elpa1_test_real_gpu_single_precision@SUFFIX@_LDADD = $(build_lib) elpa1_test_real_gpu_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa1_test_real_gpu_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules elpa1_test_real_gpu_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
...@@ -848,6 +887,16 @@ EXTRA_elpa2_test_real_gpu_single_precision@SUFFIX@_DEPENDENCIES = test/Fortran/e ...@@ -848,6 +887,16 @@ EXTRA_elpa2_test_real_gpu_single_precision@SUFFIX@_DEPENDENCIES = test/Fortran/e
endif endif
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
elpa_test_new_interface_complex_single_1stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_complex_single_1stage_gpu.F90
elpa_test_new_interface_complex_single_1stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_complex_single_1stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_complex_single_1stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa_test_new_interface_complex_single_2stage_gpu@SUFFIX@_SOURCES = test/Fortran/test_new_interface_complex_single_2stage_gpu.F90
elpa_test_new_interface_complex_single_2stage_gpu@SUFFIX@_LDADD = $(build_lib)
elpa_test_new_interface_complex_single_2stage_gpu@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa_test_new_interface_complex_single_2stage_gpu@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa1_test_complex_gpu_single_precision@SUFFIX@_SOURCES = test/Fortran/test_complex_gpu_single.F90 elpa1_test_complex_gpu_single_precision@SUFFIX@_SOURCES = test/Fortran/test_complex_gpu_single.F90
elpa1_test_complex_gpu_single_precision@SUFFIX@_LDADD = $(build_lib) elpa1_test_complex_gpu_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa1_test_complex_gpu_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules elpa1_test_complex_gpu_single_precision@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
...@@ -932,18 +981,26 @@ check_SCRIPTS += \ ...@@ -932,18 +981,26 @@ check_SCRIPTS += \
elpa1_test_real_gpu@SUFFIX@.sh \ elpa1_test_real_gpu@SUFFIX@.sh \
elpa1_test_complex_gpu@SUFFIX@.sh \ elpa1_test_complex_gpu@SUFFIX@.sh \
elpa2_test_real_gpu@SUFFIX@.sh \ elpa2_test_real_gpu@SUFFIX@.sh \
elpa2_test_complex_gpu@SUFFIX@.sh elpa2_test_complex_gpu@SUFFIX@.sh \
elpa_test_new_interface_complex_1stage_gpu@SUFFIX@.sh \
elpa_test_new_interface_complex_2stage_gpu@SUFFIX@.sh \
elpa_test_new_interface_real_1stage_gpu@SUFFIX@.sh \
elpa_test_new_interface_real_2stage_gpu@SUFFIX@.sh
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
check_SCRIPTS += \ check_SCRIPTS += \
elpa1_test_real_gpu_single_precision@SUFFIX@.sh \ elpa1_test_real_gpu_single_precision@SUFFIX@.sh \
elpa2_test_real_gpu_single_precision@SUFFIX@.sh elpa2_test_real_gpu_single_precision@SUFFIX@.sh \
elpa_test_new_interface_real_single_1stage_gpu@SUFFIX@.sh \
elpa_test_new_interface_real_single_2stage_gpu@SUFFIX@.sh
endif endif
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
check_SCRIPTS += \ check_SCRIPTS += \
elpa1_test_complex_gpu_single_precision@SUFFIX@.sh \ elpa1_test_complex_gpu_single_precision@SUFFIX@.sh \
elpa2_test_complex_gpu_single_precision@SUFFIX@.sh elpa2_test_complex_gpu_single_precision@SUFFIX@.sh \
elpa_test_new_interface_complex_single_1stage_gpu@SUFFIX@.sh \
elpa_test_new_interface_complex_single_2stage_gpu@SUFFIX@.sh
endif endif
endif endif
# test scripts # test scripts
......
...@@ -58,8 +58,8 @@ function elpa_solve_evp_& ...@@ -58,8 +58,8 @@ function elpa_solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_1stage_& &_1stage_&
&PRECISION& &PRECISION&
&_new (na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & &_new ( na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, &
useGPU) result(success) mpi_comm_cols, mpi_comm_all, useGPU) result(success)
use precision use precision
use cuda_functions use cuda_functions
use mod_check_for_gpu use mod_check_for_gpu
...@@ -101,7 +101,7 @@ function elpa_solve_evp_& ...@@ -101,7 +101,7 @@ function elpa_solve_evp_&
integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */ #endif /* COMPLEXCASE */
logical, intent(in), optional :: useGPU logical, intent(in) :: useGPU
logical :: success logical :: success
logical :: do_useGPU logical :: do_useGPU
...@@ -146,12 +146,29 @@ function elpa_solve_evp_& ...@@ -146,12 +146,29 @@ function elpa_solve_evp_&
do_useGPU = .false. do_useGPU = .false.
if (present(useGPU)) then
! user defined GPU usage via the optional argument in the API call if (useGPU) then
if (useGPU) then if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
do_useGPU = .true.
! set the neccessary parameters
cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
cudaHostRegisterPortable = cuda_hostRegisterPortable()
cudaHostRegisterMapped = cuda_hostRegisterMapped()
else
print *,"GPUs are requested but not detected! Aborting..."
success = .false.
return
endif
else
! check whether set by environment variable
do_useGPU = gpu_usage_via_environment_variable()
if (do_useGPU) then
if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
do_useGPU = .true.
! set the neccessary parameters ! set the neccessary parameters
cudaMemcpyHostToDevice = cuda_memcpyHostToDevice() cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost() cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
...@@ -164,9 +181,6 @@ function elpa_solve_evp_& ...@@ -164,9 +181,6 @@ function elpa_solve_evp_&
return return
endif endif
endif endif
else
! check whether set by environment variable
do_useGPU = gpu_usage_via_environment_variable()
endif endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
......
...@@ -55,13 +55,14 @@ ...@@ -55,13 +55,14 @@
&2stage_& &2stage_&
&PRECISION& &PRECISION&
&_new (na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & &_new (na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
useGPU, &
#if REALCASE == 1 #if REALCASE == 1
THIS_ELPA_KERNEL_API, useQR, & THIS_ELPA_KERNEL_API, useQR &
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
THIS_ELPA_KERNEL_API, & THIS_ELPA_KERNEL_API &
#endif #endif
useGPU) result(success) &) result(success)
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
use timings use timings
...@@ -76,7 +77,7 @@ ...@@ -76,7 +77,7 @@
use mod_check_for_gpu use mod_check_for_gpu
use iso_c_binding use iso_c_binding
implicit none implicit none
logical, intent(in), optional :: useGPU logical, intent(in) :: useGPU
#if REALCASE == 1 #if REALCASE == 1
logical, intent(in), optional :: useQR logical, intent(in), optional :: useQR
#endif #endif
...@@ -176,11 +177,27 @@ ...@@ -176,11 +177,27 @@
endif endif
#endif /* REALCASE */ #endif /* REALCASE */
if (present(useGPU)) then if (useGPU) then
if (useGPU) then if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
do_useGPU = .true. do_useGPU = .true.
! set the neccessary parameters
cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
cudaHostRegisterPortable = cuda_hostRegisterPortable()
cudaHostRegisterMapped = cuda_hostRegisterMapped()
else
print *,"GPUs are requested but not detected! Aborting..."
success = .false.
return
endif
else
! check whether set by environment variable
do_useGPU = gpu_usage_via_environment_variable()
if (do_useGPU) then
if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
! set the neccessary parameters ! set the neccessary parameters
cudaMemcpyHostToDevice = cuda_memcpyHostToDevice() cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
...@@ -194,10 +211,7 @@ ...@@ -194,10 +211,7 @@
return return
endif endif
endif endif
else endif
! check whether set by environment variable
do_useGPU = gpu_usage_via_environment_variable()
endif
if (present(THIS_ELPA_KERNEL_API)) then if (present(THIS_ELPA_KERNEL_API)) then
! user defined kernel via the optional argument in the API call ! user defined kernel via the optional argument in the API call
......
...@@ -264,6 +264,19 @@ module elpa_type ...@@ -264,6 +264,19 @@ module elpa_type
integer(kind=c_int) :: success_internal integer(kind=c_int) :: success_internal
logical :: success_l logical :: success_l
logical :: useGPU
if (self%get("gpu",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then
print *,"Could not querry gpu"
stop
endif
useGPU = .true.
else
useGPU = .false.
endif
if (self%get("solver",success_internal) .eq. 1) then if (self%get("solver",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
print *,"Could not querry solver" print *,"Could not querry solver"
...@@ -272,7 +285,7 @@ module elpa_type ...@@ -272,7 +285,7 @@ module elpa_type
success_l = elpa_solve_evp_real_1stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_real_1stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else if (self%get("solver",success_internal) .eq. 2) then else if (self%get("solver",success_internal) .eq. 2) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
...@@ -282,7 +295,7 @@ module elpa_type ...@@ -282,7 +295,7 @@ module elpa_type
success_l = elpa_solve_evp_real_2stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_real_2stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else else
print *,"unknown solver" print *,"unknown solver"
stop stop
...@@ -315,7 +328,21 @@ module elpa_type ...@@ -315,7 +328,21 @@ module elpa_type
integer(kind=c_int) :: success_internal integer(kind=c_int) :: success_internal
logical :: success_l logical :: success_l
logical :: useGPU
#ifdef WANT_SINGLE_PRECISION_REAL #ifdef WANT_SINGLE_PRECISION_REAL
if (self%get("gpu",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then
print *,"Could not querry gpu"
stop
endif
useGPU = .true.
else
useGPU = .false.
endif
if (self%get("solver",success_internal) .eq. 1) then if (self%get("solver",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
print *,"Could not querry solver" print *,"Could not querry solver"
...@@ -324,7 +351,7 @@ module elpa_type ...@@ -324,7 +351,7 @@ module elpa_type
success_l = elpa_solve_evp_real_1stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_real_1stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else if (self%get("solver",success_internal) .eq. 2) then else if (self%get("solver",success_internal) .eq. 2) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
...@@ -334,7 +361,7 @@ module elpa_type ...@@ -334,7 +361,7 @@ module elpa_type
success_l = elpa_solve_evp_real_2stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_real_2stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else else
print *,"unknown solver" print *,"unknown solver"
stop stop
...@@ -372,7 +399,19 @@ module elpa_type ...@@ -372,7 +399,19 @@ module elpa_type
integer(kind=c_int) :: success_internal integer(kind=c_int) :: success_internal
logical :: success_l logical :: success_l
logical :: useGPU
if (self%get("gpu",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then
print *,"Could not querry gpu"
stop
endif
useGPU = .true.
else
useGPU = .false.
endif
if (self%get("solver",success_internal) .eq. 1) then if (self%get("solver",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
print *,"Could not querry solver" print *,"Could not querry solver"
...@@ -381,7 +420,7 @@ module elpa_type ...@@ -381,7 +420,7 @@ module elpa_type
success_l = elpa_solve_evp_complex_1stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_complex_1stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else if (self%get("solver",success_internal) .eq. 2) then else if (self%get("solver",success_internal) .eq. 2) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
...@@ -391,7 +430,7 @@ module elpa_type ...@@ -391,7 +430,7 @@ module elpa_type
success_l = elpa_solve_evp_complex_2stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_complex_2stage_double_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else else
print *,"unknown solver" print *,"unknown solver"
stop stop
...@@ -426,7 +465,20 @@ module elpa_type ...@@ -426,7 +465,20 @@ module elpa_type
integer(kind=c_int) :: success_internal integer(kind=c_int) :: success_internal
logical :: success_l logical :: success_l
logical :: useGPU
#ifdef WANT_SINGLE_PRECISION_COMPLEX #ifdef WANT_SINGLE_PRECISION_COMPLEX
if (self%get("gpu",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then
print *,"Could not querry gpu"
stop
endif
useGPU = .true.
else
useGPU = .false.
endif
if (self%get("solver",success_internal) .eq. 1) then if (self%get("solver",success_internal) .eq. 1) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
print *,"Could not querry solver" print *,"Could not querry solver"
...@@ -435,7 +487,7 @@ module elpa_type ...@@ -435,7 +487,7 @@ module elpa_type
success_l = elpa_solve_evp_complex_1stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_complex_1stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else if (self%get("solver",success_internal) .eq. 2) then else if (self%get("solver",success_internal) .eq. 2) then
if (success_internal .ne. ELPA_OK) then if (success_internal .ne. ELPA_OK) then
...@@ -445,7 +497,7 @@ module elpa_type ...@@ -445,7 +497,7 @@ module elpa_type
success_l = elpa_solve_evp_complex_2stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, & success_l = elpa_solve_evp_complex_2stage_single_new(self%na, self%nev, a, self%local_nrows, ev, q, &
self%local_nrows, self%nblk, self%local_ncols, & self%local_nrows, self%nblk, self%local_ncols, &
self%mpi_comm_rows, self%mpi_comm_cols, & self%mpi_comm_rows, self%mpi_comm_cols, &
self%mpi_comm_parent) self%mpi_comm_parent, useGPU)
else else
print *,"unknown solver" print *,"unknown solver"
stop stop
......
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
#define stringify_(x) "x"
#define stringify(x) stringify_(x)
#define assert(x) call x_assert(x, stringify(x), __FILE__, __LINE__)
program test_interface
use precision
use assert
use mod_setup_mpi
use elpa_mpi
use elpa_type
use mod_prepare_matrix
use mod_read_input_parameters
use mod_blacs_infrastructure
use mod_check_correctness
implicit none
! matrix dimensions
integer :: na, nev, nblk
! mpi
integer :: myid, nprocs
integer :: na_cols, na_rows ! local matrix size
integer :: np_cols, np_rows ! number of MPI processes per column/row
integer :: my_prow, my_pcol ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1)
integer :: mpierr