Commit de6a4fde authored by Andreas Marek's avatar Andreas Marek

Enable single-precision calculations for ELPA1

With the configure option "--enable-single-precision" ELPA1 is build
with single-precision (half-words) only.

The best precision in single-precision (float or complex) is
2^-23 ~ 1.2e-7. The accuracy of the error residual of ELPA1 in
single-precision mode is of the order 1e-4 to 1e-5. The orthogonality of
the EV's is fullfilled up to about ~1e-6.

Thus the precision of ELPA1 in single-precision mode is roughly 100 -
1000 times less than the best achievable precison. This is consistent
with the double-precision mode, where also a factor of 100 - 1000 less
precision than the theoretical best one is found.

The float EVs are identical to the double EVs to at least 1e-2, the
precision of the EVs is thus about 1e-7/1e-2 = 1e5 times lower than the
best theoretical precision. If the same holds for the double precision
calculations, this implies that the double precision results can also
be only trusted on the level 1e-11 (5 orders of magnitude larger
than the best theoretical precision)

The best speed-up compared to the double precision calculation is
a factor of two. This is by far not achieved yet, since the singl
precision version is not at all optimized at the moment
parent c6b56c7e
......@@ -9,7 +9,7 @@ AM_LDFLAGS = $(SCALAPACK_LDFLAGS)
lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++
libelpa@SUFFIX@_la_SOURCES = src/mod_precision.f90 \
libelpa@SUFFIX@_la_SOURCES = src/mod_precision.F90 \
src/elpa_utilities.F90 \
src/elpa1_compute.F90 \
src/elpa1.F90 \
......@@ -310,6 +310,8 @@ elpa2_test_complex_default_kernel.sh:
elpa2_test_complex_choose_kernel_with_api.sh:
echo 'mpiexec -n 2 ./elpa2_test_complex_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_complex_choose_kernel_with_api.sh
chmod +x elpa2_test_complex_choose_kernel_with_api.sh
mod_precision.i: $(top_srcdir)/src/mod_precision.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $(top_srcdir)/src/mod_precision.F90 -o $@
elpa2_utilities.i: $(top_srcdir)/src/elpa2_utilities.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $(top_srcdir)/src/elpa2_utilities.F90 -o $@
......@@ -320,6 +322,9 @@ elpa2.i: $(top_srcdir)/src/elpa2.F90
elpa1.i: $(top_srcdir)/src/elpa1.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $(top_srcdir)/src/elpa1.F90 -o $@
elpa1_compute.i: $(top_srcdir)/src/elpa1_compute.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -I$(top_srcdir)/ -c $(top_srcdir)/src/elpa1_compute.F90 -o $@
elpa2_kernels_real.i: $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90
$(CPP) $(CPPFLAGS) -I$(top_builddir)/ -c $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90 -o $@
......
......@@ -509,6 +509,14 @@ if test x"${want_gpu}" = x"yes" ; then
can_compile_gpu=yes
fi
dnl check whether single precision is requested
AC_MSG_CHECKING(whether single precision calculations are requested)
AC_ARG_ENABLE(single-precision,[AS_HELP_STRING([--enable-single-precision],
[build with single precision])],
want_single_precision="yes", want_single_precision="no")
AC_MSG_RESULT([${want_single_precision}])
dnl now check which kernels can be compiled
dnl the checks for SSE were already done before
......@@ -722,10 +730,15 @@ DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs])
DESPERATELY_WANT_ASSUMED_SIZE=0
if text x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then
if test x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([DESPERATELY_WANT_ASSUMED_SIZE],[1],[use assumed size arrays, even if not debuggable])
fi
if test x"${want_single_precision}" = x"no" ; then
AC_DEFINE([DOUBLE_PRECISION_REAL],[1],[use double precision for real calculation])
AC_DEFINE([DOUBLE_PRECISION_COMPLEX],[1],[use double precision for complex calculation])
fi
AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS])
AC_SUBST([with_amd_bulldozer_kernel])
......
......@@ -88,6 +88,7 @@ module ELPA1
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use iso_c_binding
implicit none
PRIVATE ! By default, all routines contained are private
......@@ -104,9 +105,9 @@ module ELPA1
! Timing results, set by every call to solve_evp_xxx
real(kind=rk), public :: time_evp_fwd !< time for forward transformations (to tridiagonal form)
real(kind=rk), public :: time_evp_solve !< time for solving the tridiagonal system
real(kind=rk), public :: time_evp_back !< time for back transformations of eigenvectors
real(kind=c_double), public :: time_evp_fwd !< time for forward transformations (to tridiagonal form)
real(kind=c_double), public :: time_evp_solve !< time for solving the tridiagonal system
real(kind=c_double), public :: time_evp_back !< time for back transformations of eigenvectors
logical, public :: elpa_print_times = .false. !< Set elpa_print_times to .true. for explicit timing outputs
......@@ -294,6 +295,7 @@ function solve_evp_real_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mp
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use iso_c_binding
implicit none
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
......@@ -303,7 +305,7 @@ function solve_evp_real_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mp
integer(kind=ik) :: my_prow, my_pcol, mpierr
real(kind=rk), allocatable :: e(:), tau(:)
real(kind=rk) :: ttt0, ttt1
real(kind=c_double) :: ttt0, ttt1 ! MPI_WTIME always needs double
logical :: success
logical, save :: firstCall = .true.
logical :: wantDebug
......@@ -395,6 +397,7 @@ function solve_evp_complex_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols,
use timings
#endif
use precision
use iso_c_binding
implicit none
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
......@@ -407,7 +410,7 @@ function solve_evp_complex_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols,
integer(kind=ik) :: l_rows, l_cols, l_cols_nev
real(kind=rk), allocatable :: q_real(:,:), e(:)
complex(kind=ck), allocatable :: tau(:)
real(kind=rk) :: ttt0, ttt1
real(kind=c_double) :: ttt0, ttt1 ! MPI_WTIME always needs double
logical :: success
logical, save :: firstCall = .true.
......
This diff is collapsed.
......@@ -125,19 +125,29 @@
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c>*/
!c> int elpa_solve_evp_real_1stage(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
#ifdef DOUBLE_PRECISION_REAL
!c> int elpa_solve_evp_real_1stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
#else
!c> int elpa_solve_evp_real_1stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
#endif
function solve_elpa1_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols) &
result(success) bind(C,name="elpa_solve_evp_real_1stage")
#ifdef DOUBLE_PRECISION_REAL
result(success) bind(C,name="elpa_solve_evp_real_1stage_double_precision")
#else
result(success) bind(C,name="elpa_solve_evp_real_1stage_single_precision")
#endif
use, intrinsic :: iso_c_binding
use elpa1, only : solve_evp_real
implicit none
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows
#ifdef DOUBLE_PRECISION_REAL
real(kind=c_double) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
#else
real(kind=c_float) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
#endif
logical :: successFortran
successFortran = solve_evp_real(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
......@@ -173,19 +183,32 @@
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c> */
!c> int elpa_solve_evp_complex_1stage(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
#ifdef DOUBLE_PRECISION_COMPLEX
!c> int elpa_solve_evp_complex_1stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
#else
!c> int elpa_solve_evp_complex_1stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols);
#endif
function solve_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols) &
result(success) bind(C,name="elpa_solve_evp_complex_1stage")
#ifdef DOUBLE_PRECISION_COMPLEX
result(success) bind(C,name="elpa_solve_evp_complex_1stage_double_precision")
#else
result(success) bind(C,name="elpa_solve_evp_complex_1stage_single_precision")
#endif
use, intrinsic :: iso_c_binding
use elpa1, only : solve_evp_complex
implicit none
integer(kind=c_int) :: success
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_double) :: ev(1:na)
#else
complex(kind=c_float_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_float) :: ev(1:na)
#endif
logical :: successFortran
......@@ -223,12 +246,20 @@
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c> */
!c> int elpa_solve_evp_real_2stage(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR);
#ifdef DOUBLE_PRECISION_REAL
!c> int elpa_solve_evp_real_2stage_double_precision(int na, int nev, double *a, int lda, double *ev, double *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR);
#else
!c> int elpa_solve_evp_real_2stage_single_precision(int na, int nev, float *a, int lda, float *ev, float *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_REAL_ELPA_KERNEL_API, int useQR);
#endif
function solve_elpa2_evp_real_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_REAL_ELPA_KERNEL_API, useQR) &
result(success) bind(C,name="elpa_solve_evp_real_2stage")
#ifdef DOUBLE_PRECISION_REAL
result(success) bind(C,name="elpa_solve_evp_real_2stage_double_precision")
#else
result(success) bind(C,name="elpa_solve_evp_real_2stage_single_precision")
#endif
use, intrinsic :: iso_c_binding
use elpa2, only : solve_evp_real_2stage
......@@ -237,9 +268,11 @@
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_REAL_ELPA_KERNEL_API, useQR
#ifdef DOUBLE_PRECISION_REAL
real(kind=c_double) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
#else
real(kind=c_float) :: a(1:lda,1:matrixCols), ev(1:na), q(1:ldq,1:matrixCols)
#endif
logical :: successFortran, useQRFortran
......@@ -287,11 +320,19 @@
!c> *
!c> * \result int: 1 if error occured, otherwise 0
!c> */
!c> int elpa_solve_evp_complex_2stage(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API);
#ifdef DOUBLE_PRECISION_COMPLEX
!c> int elpa_solve_evp_complex_2stage_double_precision(int na, int nev, double complex *a, int lda, double *ev, double complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API);
#else
!c> int elpa_solve_evp_complex_2stage_single_precision(int na, int nev, complex *a, int lda, float *ev, complex *q, int ldq, int nblk, int matrixCols, int mpi_comm_rows, int mpi_comm_cols, int mpi_comm_all, int THIS_COMPLEX_ELPA_KERNEL_API);
#endif
function solve_elpa2_evp_complex_wrapper(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
THIS_COMPLEX_ELPA_KERNEL_API) &
result(success) bind(C,name="elpa_solve_evp_complex_2stage")
#ifdef DOUBLE_PRECISION_COMPLEX
result(success) bind(C,name="elpa_solve_evp_complex_2stage_double_precision")
#else
result(success) bind(C,name="elpa_solve_evp_complex_2stage_single_precision")
#endif
use, intrinsic :: iso_c_binding
use elpa2, only : solve_evp_complex_2stage
......@@ -301,8 +342,13 @@
integer(kind=c_int), value, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_cols, mpi_comm_rows, &
mpi_comm_all
integer(kind=c_int), value, intent(in) :: THIS_COMPLEX_ELPA_KERNEL_API
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=c_double_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_double) :: ev(1:na)
#else
complex(kind=c_float_complex) :: a(1:lda,1:matrixCols), q(1:ldq,1:matrixCols)
real(kind=c_float) :: ev(1:na)
#endif
logical :: successFortran
successFortran = solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, &
......
......@@ -49,6 +49,8 @@
! distributed along with the original code in the file "COPYING".
#endif
#include "config-f90.h"
#if REALCASE==1
subroutine elpa_reduce_add_vectors_real(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvr,nvc,nblk)
#endif
......@@ -146,13 +148,25 @@ subroutine elpa_reduce_add_vectors_complex(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t
!$omp master
#endif
#if REALCASE==1
if(k>0) call mpi_reduce(aux1,aux2,k,MPI_REAL8,MPI_SUM,ipt,comm_t,mpierr)
#ifdef DOUBLE_PRECISION_REAL
if(k>0) call mpi_reduce(aux1, aux2, k, MPI_REAL8, MPI_SUM, ipt, comm_t, mpierr)
#else
if(k>0) call mpi_reduce(aux1, aux2, k, MPI_REAL4, MPI_SUM, ipt, comm_t, mpierr)
#endif
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
if(k>0) call mpi_reduce(aux1,aux2,k,MPI_DOUBLE_COMPLEX,MPI_SUM,ipt,comm_t,mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
if(k>0) call mpi_reduce(aux1, aux2, k, MPI_DOUBLE_COMPLEX, MPI_SUM, ipt, comm_t, mpierr)
#else
if(k>0) call mpi_reduce(aux1, aux2, k, MPI_COMPLEX, MPI_SUM, ipt, comm_t, mpierr)
#endif
#endif /* COMPLEXCASE == 1 */
#ifdef WITH_OPENMP
!$omp end master
!$omp barrier
......
......@@ -49,6 +49,8 @@
! distributed along with the original code in the file "COPYING".
#endif
#include "config-f90.h"
#if REALCASE==1
subroutine elpa_transpose_vectors_real(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvs,nvr,nvc,nblk)
#endif
......@@ -155,12 +157,25 @@ subroutine elpa_transpose_vectors_complex(vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,
!$omp master
#endif
#if COMPLEXCASE==1
call MPI_Bcast(aux,nblks_comm*nblk*nvc,MPI_DOUBLE_COMPLEX,ips,comm_s,mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call MPI_Bcast(aux, nblks_comm*nblk*nvc, MPI_DOUBLE_COMPLEX, ips, comm_s, mpierr)
#else
call MPI_Bcast(aux, nblks_comm*nblk*nvc, MPI_COMPLEX, ips, comm_s, mpierr)
#endif
#endif /* DOUBLE_PRECISION_COMPLEX */
#if REALCASE==1
call MPI_Bcast(aux,nblks_comm*nblk*nvc,MPI_REAL8,ips,comm_s,mpierr)
#ifdef DOUBLE_PRECISION_REAL
call MPI_Bcast(aux, nblks_comm*nblk*nvc, MPI_REAL8, ips, comm_s, mpierr)
#else
call MPI_Bcast(aux, nblks_comm*nblk*nvc, MPI_REAL4, ips, comm_s, mpierr)
#endif
#endif /* REALCASE == 1 */
#ifdef WITH_OPENMP
!$omp end master
!$omp barrier
......
#include "config-f90.h"
module precision
use iso_c_binding, only : C_FLOAT, C_DOUBLE, C_INT32_T, C_INT64_T
use iso_c_binding, only : C_FLOAT, C_DOUBLE, C_INT32_T, C_INT64_T, C_FLOAT
implicit none
#ifdef DOUBLE_PRECISION_REAL
integer, parameter :: rk = C_DOUBLE
#else
integer, parameter :: rk = C_FLOAT
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
integer, parameter :: ck = C_DOUBLE
#else
integer, parameter :: ck = C_FLOAT
#endif
integer, parameter :: ik = C_INT32_T
integer, parameter :: lik = C_INT64_T
end module precision
......@@ -50,6 +50,9 @@
#endif
! --------------------------------------------------------------------------------------------------
! redist_band: redistributes band from 2D block cyclic form to 1D band
#include "config-f90.h"
#if REALCASE==1
subroutine redist_band_real(r_a, lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm, r_ab)
#endif
......@@ -248,12 +251,25 @@ subroutine redist_band_complex(c_a, lda, na, nblk, nbw, matrixCols, mpi_comm_row
! Exchange all data with MPI_Alltoallv
#if REALCASE==1
call MPI_Alltoallv(r_sbuf,ncnt_s,nstart_s,MPI_REAL8,r_rbuf,ncnt_r,nstart_r,MPI_REAL8,mpi_comm,mpierr)
#ifdef DOUBLE_PRECISION_REAL
call MPI_Alltoallv(r_sbuf, ncnt_s, nstart_s, MPI_REAL8, r_rbuf, ncnt_r, nstart_r, MPI_REAL8, mpi_comm, mpierr)
#else
call MPI_Alltoallv(r_sbuf, ncnt_s, nstart_s, MPI_REAL4, r_rbuf, ncnt_r, nstart_r, MPI_REAL4, mpi_comm, mpierr)
#endif
#endif /* REALCASE==1 */
#if COMPLEXCASE==1
call MPI_Alltoallv(c_sbuf,ncnt_s,nstart_s,MPI_COMPLEX16,c_rbuf,ncnt_r,nstart_r,MPI_COMPLEX16,mpi_comm,mpierr)
#ifdef DOUBLE_PRECISION_COMPLEX
call MPI_Alltoallv(c_sbuf, ncnt_s, nstart_s, MPI_COMPLEX16, c_rbuf, ncnt_r, nstart_r, MPI_COMPLEX16, mpi_comm, mpierr)
#else
call MPI_Alltoallv(c_sbuf, ncnt_s, nstart_s, MPI_COMPLEX, c_rbuf, ncnt_r, nstart_r, MPI_COMPLEX, mpi_comm, mpierr)
#endif
#endif /* COMPLEXCASE==1 */
! set band from receive buffer
ncnt_r(:) = ncnt_r(:)/(nblk*nblk)
......
......@@ -72,11 +72,19 @@ int main(int argc, char** argv) {
int info, *sc_desc;
int na_rows, na_cols;
double startVal;
#ifdef DOUBLE_PRECISION_COMPLEX
complex double *a, *z, *as, *tmp1, *tmp2;
double *ev, *xr;
#else
complex *a, *z, *as, *tmp1, *tmp2;
float *ev, *xr;
#endif
int *iseed;
......@@ -103,6 +111,12 @@ int main(int argc, char** argv) {
printf("\n");
#ifdef DOUBLE_PRECISION_COMPLEX
printf("The double precision version of ELPA1 is used\n");
#else
printf("The single precision version of ELPA1 is used\n");
#endif
printf("\n");
}
status = 0;
......@@ -161,6 +175,7 @@ int main(int argc, char** argv) {
printf("\n");
}
#ifdef DOUBLE_PRECISION_COMPLEX
a = malloc(na_rows*na_cols*sizeof(complex double));
z = malloc(na_rows*na_cols*sizeof(complex double));
as = malloc(na_rows*na_cols*sizeof(complex double));
......@@ -172,10 +187,26 @@ int main(int argc, char** argv) {
tmp1 = malloc(na_rows*na_cols*sizeof(complex double));
tmp2 = malloc(na_rows*na_cols*sizeof(complex double));
#else
a = malloc(na_rows*na_cols*sizeof(complex));
z = malloc(na_rows*na_cols*sizeof(complex));
as = malloc(na_rows*na_cols*sizeof(complex));
iseed = malloc(4096*sizeof(int));
xr = malloc(na_rows*na_cols*sizeof(float));
prepare_matrix_complex_from_fortran(na, myid, na_rows, na_cols, sc_desc, iseed, xr, a, z, as);
ev = malloc(na*sizeof(float));
tmp1 = malloc(na_rows*na_cols*sizeof(complex));
tmp2 = malloc(na_rows*na_cols*sizeof(complex));
#endif
iseed = malloc(4096*sizeof(int));
#ifdef DOUBLE_PRECISION_COMPLEX
prepare_matrix_complex_from_fortran_double_precision(na, myid, na_rows, na_cols, sc_desc, iseed, xr, a, z, as);
#else
prepare_matrix_complex_from_fortran_single_precision(na, myid, na_rows, na_cols, sc_desc, iseed, xr, a, z, as);
#endif
free(xr);
......@@ -187,8 +218,11 @@ int main(int argc, char** argv) {
mpierr = MPI_Barrier(MPI_COMM_WORLD);
success = elpa_solve_evp_complex_1stage(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols);
#ifdef DOUBLE_PRECISION_COMPLEX
success = elpa_solve_evp_complex_1stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols);
#else
success = elpa_solve_evp_complex_1stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols);
#endif
if (success != 1) {
printf("error in ELPA solve \n");
mpierr = MPI_Abort(MPI_COMM_WORLD, 99);
......@@ -202,7 +236,11 @@ int main(int argc, char** argv) {
}
/* check the results */
status = check_correctness_complex_from_fortran(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#ifdef DOUBLE_PRECISION_COMPLEX
status = check_correctness_complex_from_fortran_double_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#else
status = check_correctness_complex_from_fortran_single_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#endif
if (status !=0){
printf("The computed EVs are not correct !\n");
......
......@@ -73,9 +73,11 @@ int main(int argc, char** argv) {
int na_rows, na_cols;
double startVal;
#ifdef DOUBLE_PRECISION_REAL
double *a, *z, *as, *ev, *tmp1, *tmp2;
#else
float *a, *z, *as, *ev, *tmp1, *tmp2;
#endif
int *iseed;
int success;
......@@ -99,6 +101,12 @@ int main(int argc, char** argv) {
printf("as it's Fortran counterpart. It's only purpose is to show how \n");
printf("to evoke ELPA1 from a c programm\n");
printf("\n");
#ifdef DOUBLE_PRECISION_REAL
printf("The double precision version of ELPA1 is used\n");
#else
printf("The single precision version of ELPA1 is used\n");
#endif
printf("\n");
}
......@@ -157,7 +165,7 @@ int main(int argc, char** argv) {
printf("Allocating matrices with na_rows=%d and na_cols=%d\n",na_rows, na_cols);
printf("\n");
}
#ifdef DOUBLE_PRECISION_REAL
a = malloc(na_rows*na_cols*sizeof(double));
z = malloc(na_rows*na_cols*sizeof(double));
as = malloc(na_rows*na_cols*sizeof(double));
......@@ -167,11 +175,24 @@ int main(int argc, char** argv) {
tmp1 = malloc(na_rows*na_cols*sizeof(double));
tmp2 = malloc(na_rows*na_cols*sizeof(double));
#else
a = malloc(na_rows*na_cols*sizeof(float));
z = malloc(na_rows*na_cols*sizeof(float));
as = malloc(na_rows*na_cols*sizeof(float));
iseed = malloc(4096*sizeof(int));
prepare_matrix_real_from_fortran(na, myid, na_rows, na_cols, sc_desc, iseed, a, z, as);
ev = malloc(na*sizeof(float));
tmp1 = malloc(na_rows*na_cols*sizeof(float));
tmp2 = malloc(na_rows*na_cols*sizeof(float));
#endif
iseed = malloc(4096*sizeof(int));
#ifdef DOUBLE_PRECISION_REAL
prepare_matrix_real_from_fortran_double_precision(na, myid, na_rows, na_cols, sc_desc, iseed, a, z, as);
#else
prepare_matrix_real_from_fortran_single_precision(na, myid, na_rows, na_cols, sc_desc, iseed, a, z, as);
#endif
if (myid == 0) {
printf("\n");
printf("Entering ELPA 1stage real solver\n");
......@@ -179,9 +200,11 @@ int main(int argc, char** argv) {
}
mpierr = MPI_Barrier(MPI_COMM_WORLD);
success = elpa_solve_evp_real_1stage(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols);
#ifdef DOUBLE_PRECISION_REAL
success = elpa_solve_evp_real_1stage_double_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols);
#else
success = elpa_solve_evp_real_1stage_single_precision(na, nev, a, na_rows, ev, z, na_rows, nblk, na_cols, mpi_comm_rows, mpi_comm_cols);
#endif
if (success != 1) {
printf("error in ELPA solve \n");
mpierr = MPI_Abort(MPI_COMM_WORLD, 99);
......@@ -194,9 +217,12 @@ int main(int argc, char** argv) {
printf("\n");
}
#ifdef DOUBLE_PRECISION_REAL
/* check the results */
status = check_correctness_real_from_fortran(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
status = check_correctness_real_from_fortran_double_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#else
status = check_correctness_real_from_fortran_single_precision(na, nev, na_rows, na_cols, as, z, ev, sc_desc, myid, tmp1, tmp2);
#endif
if (status !=0){
printf("The computed EVs are not correct !\n");
}
......
......@@ -140,6 +140,22 @@ program test_complex
endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
if (myid .eq. 0) then
print *," "
print *,"Double precision version of ELPA1 is used"
print *," "
endif
#else
if (myid .eq. 0) then
print *," "
print *,"Single precision version of ELPA1 is used"
print *," "
endif
#endif
call MPI_BARRIER(MPI_COMM_WORLD, mpierr)
#ifdef HAVE_REDIRECT
if (check_redirect_environment_variable()) then
if (myid .eq. 0) then
......
......@@ -178,6 +178,21 @@ program test_real
print *," "
endif
#endif
#ifdef DOUBLE_PRECISION_REAL
if (myid .eq. 0) then
print *," "
print *,"Double precision version of ELPA1 is used"
print *," "
endif
#else
if (myid .eq. 0) then
print *," "
print *,"Single precision version of ELPA1 is used"
print *," "
endif
#endif
call MPI_BARRIER(MPI_COMM_WORLD, mpierr)
#ifdef HAVE_REDIRECT
......
......@@ -40,19 +40,32 @@
/* the original distribution, the GNU Lesser General Public License. */
/* */
/* */
#include "config-f90.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <elpa/elpa.h>