Commit 1a291901 authored by Andreas Marek's avatar Andreas Marek

Make ELPA 1stage, 2stage calls consistent: introduce useGPU also in ELPA 2stage

parent 690de1d9
......@@ -254,15 +254,15 @@ module ELPA
endif
if (useELPA1) then
success = elpa_solve_evp_real_1stage_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
useGPU = useGPU)
success = elpa_solve_evp_real_1stage_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
useGPU)
else
success = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, &
success = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API = THIS_REAL_ELPA_KERNEL_API, &
useQR = useQR)
THIS_REAL_ELPA_KERNEL_API, &
useQR, useGPU)
endif
end function elpa_solve_evp_real_double
......@@ -356,13 +356,13 @@ module ELPA
if (useELPA1) then
success = elpa_solve_evp_real_1stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
useGPU = useGPU)
useGPU)
else
success = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API = THIS_REAL_ELPA_KERNEL_API, &
useQR = useQR)
THIS_REAL_ELPA_KERNEL_API, &
useQR, useGPU)
endif
end function elpa_solve_evp_real_single
......@@ -460,7 +460,7 @@ module ELPA
success = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API = THIS_COMPLEX_ELPA_KERNEL_API)
THIS_COMPLEX_ELPA_KERNEL_API, useGPU)
endif
end function elpa_solve_evp_complex_double
......@@ -557,7 +557,7 @@ module ELPA
success = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API = THIS_COMPLEX_ELPA_KERNEL_API)
THIS_COMPLEX_ELPA_KERNEL_API,useGPU)
endif
end function elpa_solve_evp_complex_single
......
This diff is collapsed.
......@@ -471,26 +471,27 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \param use_qr use QR decomposition 1 = yes, 0 = no
!c> * \param useQR use QR decomposition 1 = yes, 0 = no
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c> */
#define DOUBLE_PRECISION_REAL 1
#ifdef DOUBLE_PRECISION_REAL
!c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR);
!c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU);
#else
!c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR);
!c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU);
#endif
#ifdef DOUBLE_PRECISION_REAL
function solve_elpa2_evp_real_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQR) &
THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) &
result(success) bind(C,name="elpa_solve_evp_real_2stage_double_precision")
#else
function solve_elpa2_evp_real_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQR) &
THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) &
result(success) bind(C,name="elpa_solve_evp_real_2stage_double_precision")
result(success) bind(C,name="elpa_solve_evp_real_2stage_single_precision")
......@@ -502,7 +503,7 @@
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR
integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR, useGPU
#ifdef DOUBLE_PRECISION_REAL
real(kind=c_double) :: ev(1:na)
#ifdef USE_ASSUMED_SIZE
......@@ -533,11 +534,11 @@
#ifdef DOUBLE_PRECISION_REAL
successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQRFortran)
THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1)
#else
successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQRFortran)
THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1)
#endif
if (successFortran) then
success = 1
......@@ -570,26 +571,27 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \param use_qr use QR decomposition 1 = yes, 0 = no
!c> * \param useQR use QR decomposition 1 = yes, 0 = no
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c> */
#undef DOUBLE_PRECISION_REAL
#ifdef DOUBLE_PRECISION_REAL
!c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR);
!c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU);
#else
!c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR);
!c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU);
#endif
#ifdef DOUBLE_PRECISION_REAL
function solve_elpa2_evp_real_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQR) &
THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) &
result(success) bind(C,name="elpa_solve_evp_real_2stage_double_precision")
#else
function solve_elpa2_evp_real_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQR) &
THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) &
result(success) bind(C,name="elpa_solve_evp_real_2stage_single_precision")
#endif
use, intrinsic :: iso_c_binding
......@@ -599,7 +601,7 @@
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR
integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR, useGPU
#ifdef DOUBLE_PRECISION_REAL
real(kind=c_double) :: ev(1:na)
#ifdef USE_ASSUMED_SIZE
......@@ -629,11 +631,11 @@
#ifdef DOUBLE_PRECISION_REAL
successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQRFortran)
THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1)
#else
successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQRFortran)
THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1)
#endif
if (successFortran) then
success = 1
......@@ -666,26 +668,27 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_COMPLEX_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c> */
#define DOUBLE_PRECISION_COMPLEX 1
#ifdef DOUBLE_PRECISION_COMPLEX
!c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API);
!c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU);
#else
!c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API);
!c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU);
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
function solve_elpa2_evp_complex_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API) &
THIS_COMPLEX_ELPA_KERNEL_API, useGPU) &
result(success) bind(C,name="elpa_solve_evp_complex_2stage_double_precision")
#else
function solve_elpa2_evp_complex_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API) &
THIS_COMPLEX_ELPA_KERNEL_API, useGPU) &
result(success) bind(C,name="elpa_solve_evp_complex_2stage_single_precision")
#endif
......@@ -696,7 +699,7 @@
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API
integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API, useGPU
#ifdef DOUBLE_PRECISION_COMPLEX
real(kind=c_double) :: ev(1:na)
#ifdef USE_ASSUMED_SIZE
......@@ -719,11 +722,11 @@
#ifdef DOUBLE_PRECISION_COMPLEX
successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API)
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1)
#else
successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API)
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1)
#endif
if (successFortran) then
success = 1
......@@ -756,27 +759,27 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \param use_qr use QR decomposition 1 = yes, 0 = no
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c> */
#undef DOUBLE_PRECISION_COMPLEX
#ifdef DOUBLE_PRECISION_COMPLEX
!c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API);
!c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU);
#else
!c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API);
!c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU);
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
function solve_elpa2_evp_complex_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API) &
THIS_COMPLEX_ELPA_KERNEL_API, useGPU) &
result(success) bind(C,name="elpa_solve_evp_complex_2stage_double_precision")
#else
function solve_elpa2_evp_complex_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API) &
THIS_COMPLEX_ELPA_KERNEL_API, useGPU) &
result(success) bind(C,name="elpa_solve_evp_complex_2stage_single_precision")
#endif
......@@ -787,7 +790,7 @@
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API
integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API, useGPU
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_double) :: ev(1:na)
......@@ -800,11 +803,11 @@
#ifdef DOUBLE_PRECISION_COMPLEX
successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API)
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1)
#else
successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API)
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1)
#endif
if (successFortran) then
success = 1
......@@ -836,7 +839,8 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \param use_qr use QR decomposition 1 = yes, 0 = no
!c> * \param useQR use QR decomposition 1 = yes, 0 = no
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> * \param method choose whether to use ELPA 1stage or 2stage solver
!c> * possible values: "1stage" => use ELPA 1stage solver
!c> * "2stage" => use ELPA 2stage solver
......@@ -924,7 +928,8 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \param use_qr use QR decomposition 1 = yes, 0 = no
!c> * \param useQR use QR decomposition 1 = yes, 0 = no
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> * \param method choose whether to use ELPA 1stage or 2stage solver
!c> * possible values: "1stage" => use ELPA 1stage solver
!c> * "2stage" => use ELPA 2stage solver
......@@ -1012,6 +1017,7 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_COMPLEX_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> * \param method choose whether to use ELPA 1stage or 2stage solver
!c> * possible values: "1stage" => use ELPA 1stage solver
!c> * "2stage" => use ELPA 2stage solver
......@@ -1092,6 +1098,7 @@
!c> * \param mpi_comm_cols MPI-Communicator for columns
!c> * \param mpi_coll_all MPI communicator for the total processor set
!c> * \param THIS_COMPLEX_ELPA_KERNEL_API specify used ELPA2 kernel via API
!c> * \parmam useGPU use GPU (1=yes, 0=No)
!c> * \param method choose whether to use ELPA 1stage or 2stage solver
!c> * possible values: "1stage" => use ELPA 1stage solver
!c> * "2stage" => use ELPA 2stage solver
......
......@@ -92,7 +92,7 @@ int main(int argc, char** argv) {
int success;
int THIS_COMPLEX_ELPA_KERNEL_API;
int THIS_COMPLEX_ELPA_KERNEL_API, useGPU;
#ifdef WITH_MPI
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
......@@ -229,11 +229,12 @@ int main(int argc, char** argv) {
#ifdef WITH_MPI
mpierr = MPI_Barrier(MPI_COMM_WORLD);
#endif
useGPU = 0;
THIS_COMPLEX_ELPA_KERNEL_API = ELPA2_COMPLEX_KERNEL_GENERIC;
#ifdef DOUBLE_PRECISION_COMPLEX
success = elpa_solve_evp_complex_2stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_COMPLEX_ELPA_KERNEL_API);
success = elpa_solve_evp_complex_2stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_COMPLEX_ELPA_KERNEL_API, useGPU);
#else
success = elpa_solve_evp_complex_2stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_COMPLEX_ELPA_KERNEL_API);
success = elpa_solve_evp_complex_2stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_COMPLEX_ELPA_KERNEL_API, useGPU);
#endif
if (success != 1) {
......
......@@ -87,7 +87,7 @@ int main(int argc, char** argv) {
int success;
int useQr, THIS_REAL_ELPA_KERNEL_API;
int useQr, THIS_REAL_ELPA_KERNEL_API, useGPU;
#ifdef WITH_MPI
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
......@@ -215,12 +215,13 @@ int main(int argc, char** argv) {
#ifdef WITH_MPI
mpierr = MPI_Barrier(MPI_COMM_WORLD);
#endif
useGPU =0 ;
useQr = 0;
THIS_REAL_ELPA_KERNEL_API = ELPA2_REAL_KERNEL_GENERIC;
#ifdef DOUBLE_PRECISION_REAL
success = elpa_solve_evp_real_2stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_REAL_ELPA_KERNEL_API, useQr);
success = elpa_solve_evp_real_2stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_REAL_ELPA_KERNEL_API, useQr, useGPU);
#else
success = elpa_solve_evp_real_2stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_REAL_ELPA_KERNEL_API, useQr);
success = elpa_solve_evp_real_2stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols, my_mpi_comm_world, THIS_REAL_ELPA_KERNEL_API, useQr, useGPU);
#endif
if (success != 1) {
printf("error in ELPA solve \n");
......
......@@ -140,6 +140,7 @@ program test_complex2_gpu_version_double_precision
type(output_t) :: write_to_file
character(len=8) :: task_suffix
integer(kind=ik) :: j
logical :: useGPU
#define DOUBLE_PRECISION_COMPLEX 1
......@@ -291,9 +292,11 @@ program test_complex2_gpu_version_double_precision
#ifdef WITH_MPI
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
#endif
useGPU = .true.
successELPA = elpa_solve_evp_complex_2stage_double(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
COMPLEX_ELPA_KERNEL_GPU)
COMPLEX_ELPA_KERNEL_GPU, useGPU)
if (.not.(successELPA)) then
......
......@@ -140,6 +140,7 @@ program test_complex2_gpu_version_single_precision
type(output_t) :: write_to_file
character(len=8) :: task_suffix
integer(kind=ik) :: j
logical :: useGPU
#define DOUBLE_PRECISION_COMPLEX 1
......@@ -291,9 +292,10 @@ program test_complex2_gpu_version_single_precision
#ifdef WITH_MPI
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
#endif
useGPU = .true.
successELPA = elpa_solve_evp_complex_2stage_single(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
COMPLEX_ELPA_KERNEL_GPU)
COMPLEX_ELPA_KERNEL_GPU, useGPU)
if (.not.(successELPA)) then
......
......@@ -136,6 +136,7 @@ program test_real2_gpu_version_double_precision
type(output_t) :: write_to_file
character(len=8) :: task_suffix
integer(kind=ik) :: j
logical :: useGPU
#define DOUBLE_PRECISION_REAL 1
......@@ -283,9 +284,10 @@ program test_real2_gpu_version_double_precision
#ifdef WITH_MPI
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
#endif
useGPU = .true.
successELPA = elpa_solve_evp_real_2stage_double(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
REAL_ELPA_KERNEL_GPU)
REAL_ELPA_KERNEL_GPU, useGPU=useGPU)
if (.not.(successELPA)) then
......
......@@ -136,6 +136,7 @@ program test_real2_gpu_version_single_precision
type(output_t) :: write_to_file
character(len=8) :: task_suffix
integer(kind=ik) :: j
logical :: useGPU
#define DOUBLE_PRECISION_REAL 1
......@@ -283,9 +284,10 @@ program test_real2_gpu_version_single_precision
#ifdef WITH_MPI
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
#endif
useGPU = .true.
successELPA = elpa_solve_evp_real_2stage_single(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world, &
REAL_ELPA_KERNEL_GPU)
REAL_ELPA_KERNEL_GPU, useGPU=useGPU)
if (.not.(successELPA)) then
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment