diff --git a/man/elpa_solve_evp_complex_2stage_double.3 b/man/elpa_solve_evp_complex_2stage_double.3 index e4aa372be1228cf17351d20507d12b6b4e541f89..10fcce4292394c56cfc7ef885e52777dbdf2c1d7 100644 --- a/man/elpa_solve_evp_complex_2stage_double.3 +++ b/man/elpa_solve_evp_complex_2stage_double.3 @@ -92,7 +92,7 @@ use elpa2 .br .RI "int \fBuseGPU\fP: decide whether GPUs should be used or not" .br -.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix" +.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix (-1 = matrix is not banded)" .br .RI "int \fBsuccess\fP: return value indicating success (1) or failure (0) diff --git a/man/elpa_solve_evp_complex_2stage_single.3 b/man/elpa_solve_evp_complex_2stage_single.3 index 30d87cef1676391419f41f4b0f6c8ed2d2494806..e5bf5ab0ddc8661e8fd73be9de9dd7e03c86f754 100644 --- a/man/elpa_solve_evp_complex_2stage_single.3 +++ b/man/elpa_solve_evp_complex_2stage_single.3 @@ -92,7 +92,7 @@ use elpa2 .br .RI "int \fBuseGPU\fP: decide whether GPUs should be used or not" .br -.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix" +.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix (-1 = matrix is not banded)" .br .RI "int \fBsuccess\fP: return value indicating success (1) or failure (0) diff --git a/man/elpa_solve_evp_real_2stage_double.3 b/man/elpa_solve_evp_real_2stage_double.3 index 66d80200444573492789a238dc9107e3405d9c62..86ad57cf50215d11b674bbd11c97ac9d5e836cb0 100644 --- a/man/elpa_solve_evp_real_2stage_double.3 +++ b/man/elpa_solve_evp_real_2stage_double.3 @@ -94,7 +94,7 @@ use elpa2 .br .RI "int \fBuseGPU\fP: decide whether GPUs should be used or not" .br -.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix" +.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix (-1 = matrix not banded)" .br .RI "int \fBsuccess\fP: return value indicating success (1) or failure (0) diff --git a/man/elpa_solve_evp_real_2stage_single.3 b/man/elpa_solve_evp_real_2stage_single.3 index 2d1bc5825db24e12138f3e8cc781deb5f0d91fb3..641f5890cc85adb883ba02230ae8943ee469e52a 100644 --- a/man/elpa_solve_evp_real_2stage_single.3 +++ b/man/elpa_solve_evp_real_2stage_single.3 @@ -49,7 +49,7 @@ use elpa2 .br .RI "logical, intent(in), optional: \fBuseGPU\fP: decide whether GPUs should be used or not" .br -.RI "integer, intent(in), optional: \fBbandwidth\fP: bandwidth of an already banded matrix" +.RI "integer, intent(in), optional: \fBbandwidth\fP: bandwidth of an already banded matrix " .br .RI "logical \fBsuccess\fP: return value indicating success or failure" .br @@ -94,7 +94,7 @@ use elpa2 .br .RI "int \fBuseGPU\fP: decide whether GPUs should be used or not" .br -.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix" +.RI "int \fBbandwidth\fP: bandwidth of an already banded matrix (-1 = matrix is not banded)" .br .RI "int \fBsuccess\fP: return value indicating success (1) or failure (0) diff --git a/src/elpa_c_interface.F90 b/src/elpa_c_interface.F90 index 9caecea6198d7b628c84093f6daf9fd9b9685871..fafb792123e599c35d1874dcd44aa8417e6c8025 100644 --- a/src/elpa_c_interface.F90 +++ b/src/elpa_c_interface.F90 @@ -141,7 +141,7 @@ !c> * \param matrixCols distributed number of matrix columns !c> * \param mpi_comm_rows MPI-Communicator for rows !c> * \param mpi_comm_cols MPI-Communicator for columns - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c>*/ @@ -229,7 +229,7 @@ !c> * \param matrixCols distributed number of matrix columns !c> * \param mpi_comm_rows MPI-Communicator for rows !c> * \param mpi_comm_cols MPI-Communicator for columns - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c>*/ @@ -304,7 +304,7 @@ !c> * \param matrixCols distributed number of matrix columns !c> * \param mpi_comm_rows MPI-Communicator for rows !c> * \param mpi_comm_cols MPI-Communicator for columns - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c> */ @@ -391,7 +391,7 @@ !c> * \param matrixCols distributed number of matrix columns !c> * \param mpi_comm_rows MPI-Communicator for rows !c> * \param mpi_comm_cols MPI-Communicator for columns - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c> */ @@ -471,27 +471,28 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \param useQR use QR decomposition 1 = yes, 0 = no - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useQR use QR decomposition 1 = yes, 0 = no + !c> * \param useGPU use GPU (1=yes, 0=No) + !c> * \param bandwidth bandwidth of already banded matrix (-1 = No banding) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c> */ #define DOUBLE_PRECISION_REAL 1 #ifdef DOUBLE_PRECISION_REAL - !c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU); + !c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU, int bandwdith); #else - !c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU); + !c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU, int bandwidth); #endif #ifdef DOUBLE_PRECISION_REAL - function solve_elpa2_evp_real_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_real_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) & + THIS_REAL_ELPA_KERNEL_API, useQR, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_real_2stage_double_precision") #else - function solve_elpa2_evp_real_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_real_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) & + THIS_REAL_ELPA_KERNEL_API, useQR, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_real_2stage_double_precision") result(success) bind(C,name="elpa_solve_evp_real_2stage_single_precision") @@ -503,7 +504,7 @@ integer(kind=c_int) :: success integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, & mpi_comm_all - integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR, useGPU + integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR, useGPU, bandwidth #ifdef DOUBLE_PRECISION_REAL real(kind=c_double) :: ev(1:na) #ifdef USE_ASSUMED_SIZE @@ -531,15 +532,32 @@ useQRFortran = .true. endif + if (bandwidth .eq. -1) then + ! no banded matrix + #ifdef DOUBLE_PRECISION_REAL - successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & - mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) + successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) #else - successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & - mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) + successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) #endif + else + ! bandwidht given +#ifdef DOUBLE_PRECISION_REAL + successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1, bandwidth) +#else + successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1, bandwidth) +#endif + + endif + if (successFortran) then success = 1 else @@ -571,27 +589,28 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \param useQR use QR decomposition 1 = yes, 0 = no - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useQR use QR decomposition 1 = yes, 0 = no + !c> * \param useGPU use GPU (1=yes, 0=No) + !c> * \param bandwidth bandwidth of already banded matrix (-1 = No banding) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c> */ #undef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL - !c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU); + !c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU, int bandwith); #else - !c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU); + !c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR, int useGPU, int bandwidth); #endif #ifdef DOUBLE_PRECISION_REAL - function solve_elpa2_evp_real_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_real_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) & + THIS_REAL_ELPA_KERNEL_API, useQR, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_real_2stage_double_precision") #else - function solve_elpa2_evp_real_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_real_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQR, useGPU) & + THIS_REAL_ELPA_KERNEL_API, useQR, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_real_2stage_single_precision") #endif use, intrinsic :: iso_c_binding @@ -601,7 +620,7 @@ integer(kind=c_int) :: success integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, & mpi_comm_all - integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR, useGPU + integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR, useGPU, bandwidth #ifdef DOUBLE_PRECISION_REAL real(kind=c_double) :: ev(1:na) #ifdef USE_ASSUMED_SIZE @@ -628,15 +647,30 @@ useQRFortran = .true. endif + if (bandwidth .eq. -1) then + ! matrix is not banded #ifdef DOUBLE_PRECISION_REAL - successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & - mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) + successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) #else - successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & - mpi_comm_cols, mpi_comm_all, & - THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) + successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1) #endif + else + ! bandwidth is given +#ifdef DOUBLE_PRECISION_REAL + successFortran = elpa_solve_evp_real_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1, bandwidth) +#else + successFortran = elpa_solve_evp_real_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, & + mpi_comm_cols, mpi_comm_all, & + THIS_REAL_ELPA_KERNEL_API, useQRFortran, useGPU == 1, bandwidth) +#endif + + endif if (successFortran) then success = 1 else @@ -668,27 +702,28 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_COMPLEX_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) + !c> * \param bandwidth bandwidth of already banded matrix (-1 = No banding) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c> */ #define DOUBLE_PRECISION_COMPLEX 1 #ifdef DOUBLE_PRECISION_COMPLEX - !c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU); + !c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU, int bandwidth); #else - !c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU); + !c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU, int bandwidth); #endif #ifdef DOUBLE_PRECISION_COMPLEX - function solve_elpa2_evp_complex_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_complex_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_COMPLEX_ELPA_KERNEL_API, useGPU) & + THIS_COMPLEX_ELPA_KERNEL_API, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_complex_2stage_double_precision") #else - function solve_elpa2_evp_complex_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_complex_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_COMPLEX_ELPA_KERNEL_API, useGPU) & + THIS_COMPLEX_ELPA_KERNEL_API, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_complex_2stage_single_precision") #endif @@ -699,7 +734,7 @@ integer(kind=c_int) :: success integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, & mpi_comm_all - integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API, useGPU + integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API, useGPU, bandwidth #ifdef DOUBLE_PRECISION_COMPLEX real(kind=c_double) :: ev(1:na) #ifdef USE_ASSUMED_SIZE @@ -719,15 +754,32 @@ #endif logical :: successFortran + + if (bandwidth .eq. -1) then + ! matrix is not banded + #ifdef DOUBLE_PRECISION_COMPLEX - successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & - mpi_comm_rows, mpi_comm_cols, & - mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) + successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) #else - successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & - mpi_comm_rows, mpi_comm_cols, & - mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) + successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) #endif + else + ! matrix is banded +#ifdef DOUBLE_PRECISION_COMPLEX + successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1, bandwidth) +#else + successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1, bandwidth) +#endif + + endif if (successFortran) then success = 1 else @@ -759,27 +811,28 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) + !c> * \param bandwidth bandwidth of already banded matrix (-1 = No banding) !c> * !c> * \result int: 1 if error occured, otherwise 0 !c> */ #undef DOUBLE_PRECISION_COMPLEX #ifdef DOUBLE_PRECISION_COMPLEX - !c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU); + !c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU, int bandwidth); #else - !c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU); + !c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API, int useGPU, int bandwidth); #endif #ifdef DOUBLE_PRECISION_COMPLEX - function solve_elpa2_evp_complex_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_complex_wrapper_double(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_COMPLEX_ELPA_KERNEL_API, useGPU) & + THIS_COMPLEX_ELPA_KERNEL_API, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_complex_2stage_double_precision") #else - function solve_elpa2_evp_complex_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & + function solve_elpa2_evp_complex_wrapper_single(na, nev, a, lda, ev, q, ldq, nblk, & matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, & - THIS_COMPLEX_ELPA_KERNEL_API, useGPU) & + THIS_COMPLEX_ELPA_KERNEL_API, useGPU, bandwidth) & result(success) bind(C,name="elpa_solve_evp_complex_2stage_single_precision") #endif @@ -790,7 +843,7 @@ integer(kind=c_int) :: success integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, & mpi_comm_all - integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API, useGPU + integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API, useGPU, bandwidth #ifdef DOUBLE_PRECISION_COMPLEX complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols) real(kind=c_double) :: ev(1:na) @@ -800,15 +853,33 @@ #endif logical :: successFortran + + if (bandwidth .eq. -1) then + ! matrix is not banded + #ifdef DOUBLE_PRECISION_COMPLEX - successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & - mpi_comm_rows, mpi_comm_cols, & - mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) + successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) #else - successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & - mpi_comm_rows, mpi_comm_cols, & - mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) + successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1) +#endif + else + ! bandwidth is given + +#ifdef DOUBLE_PRECISION_COMPLEX + successFortran = elpa_solve_evp_complex_2stage_double(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1, bandwidth) +#else + successFortran = elpa_solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, & + mpi_comm_rows, mpi_comm_cols, & + mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API, useGPU == 1, bandwidth) #endif + + endif if (successFortran) then success = 1 else @@ -839,10 +910,10 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \param useQR use QR decomposition 1 = yes, 0 = no - !c> * \parmam useGPU use GPU (1=yes, 0=No) - !c> * \param method choose whether to use ELPA 1stage or 2stage solver - !c> * possible values: "1stage" => use ELPA 1stage solver + !c> * \param useQR use QR decomposition 1 = yes, 0 = no + !c> * \param useGPU use GPU (1=yes, 0=No) + !c> * \param method choose whether to use ELPA 1stage or 2stage solver + !c> * possible values: "1stage" => use ELPA 1stage solver !c> * "2stage" => use ELPA 2stage solver !c> * "auto" => (at the moment) use ELPA 2stage solver !c> * @@ -928,10 +999,10 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_REAL_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \param useQR use QR decomposition 1 = yes, 0 = no - !c> * \parmam useGPU use GPU (1=yes, 0=No) - !c> * \param method choose whether to use ELPA 1stage or 2stage solver - !c> * possible values: "1stage" => use ELPA 1stage solver + !c> * \param useQR use QR decomposition 1 = yes, 0 = no + !c> * \param useGPU use GPU (1=yes, 0=No) + !c> * \param method choose whether to use ELPA 1stage or 2stage solver + !c> * possible values: "1stage" => use ELPA 1stage solver !c> * "2stage" => use ELPA 2stage solver !c> * "auto" => (at the moment) use ELPA 2stage solver !c> * @@ -1017,7 +1088,7 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_COMPLEX_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) !c> * \param method choose whether to use ELPA 1stage or 2stage solver !c> * possible values: "1stage" => use ELPA 1stage solver !c> * "2stage" => use ELPA 2stage solver @@ -1098,7 +1169,7 @@ !c> * \param mpi_comm_cols MPI-Communicator for columns !c> * \param mpi_coll_all MPI communicator for the total processor set !c> * \param THIS_COMPLEX_ELPA_KERNEL_API specify used ELPA2 kernel via API - !c> * \parmam useGPU use GPU (1=yes, 0=No) + !c> * \param useGPU use GPU (1=yes, 0=No) !c> * \param method choose whether to use ELPA 1stage or 2stage solver !c> * possible values: "1stage" => use ELPA 1stage solver !c> * "2stage" => use ELPA 2stage solver