Abort if in single-precision mode wrong kernel is chosen

At the moment only the generic kernels are available for
single-precision. The SSE and AVX kernels have still to be ported.
parent 8af19130
......@@ -299,6 +299,16 @@ contains
cudaHostRegisterMapped = cuda_hostRegisterMapped()
endif
! some temporarilly checks until single precision works with all kernels
#ifndef DOUBLE_PRECISION_REAL
if ( (THIS_REAL_ELPA_KERNEL .ne. REAL_ELPA_KERNEL_GENERIC) .or. &
(THIS_REAL_ELPA_KERNEL .ne. REAL_ELPA_KERNEL_GENERIC_SIMPLE) .or. &
(THIS_REAL_ELPA_KERNEL .ne. REAL_ELPA_KERNEL_GPU) ) then
print *,"At the moment single precision only works with the generic kernels"
stop
endif
#endif
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
! On older systems (IBM Bluegene/P, Intel Nehalem) a value of 32 was optimal.
! For Intel(R) Xeon(R) E5 v2 and v3, better use 64 instead of 32!
......@@ -630,7 +640,15 @@ contains
print *,"At the moment GPU version needs blocksize 128"
stop
endif
! some temporarilly checks until single precision works with all kernels
#ifndef DOUBLE_PRECISION_REAL
if ( (THIS_REAL_ELPA_KERNEL .ne. REAL_ELPA_KERNEL_GENERIC) .or. &
(THIS_REAL_ELPA_KERNEL .ne. REAL_ELPA_KERNEL_GENERIC_SIMPLE) .or. &
(THIS_REAL_ELPA_KERNEL .ne. REAL_ELPA_KERNEL_GPU) ) then
print *,"At the moment single precision only works with the generic kernels"
stop
endif
#endif
! set the neccessary parameters
cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
......@@ -925,7 +943,13 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
endif
THIS_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC
endif
#ifndef DOUBLE_PRECISION_COMPLEX
if ( (THIS_COMPLEX_ELPA_KERNEL .ne. COMPLEX_ELPA_KERNEL_GENERIC) .or. &
(THIS_COMPLEX_ELPA_KERNEL .ne. COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE) ) then
print *,"At the moment single precision only works with the generic kernels"
stop
endif
#endif
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_GPU) then
if (check_for_gpu(my_pe, numberOfGPUDevices, wantDebug=wantDebug)) then
useGPU=.true.
......@@ -1241,7 +1265,13 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
endif
THIS_COMPLEX_ELPA_KERNEL = COMPLEX_ELPA_KERNEL_GENERIC
endif
#ifndef DOUBLE_PRECISION_COMPLEX
if ( (THIS_COMPLEX_ELPA_KERNEL .ne. COMPLEX_ELPA_KERNEL_GENERIC) .or. &
(THIS_COMPLEX_ELPA_KERNEL .ne. COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE) ) then
print *,"At the moment single precision only works with the generic kernels"
stop
endif
#endif
if (THIS_COMPLEX_ELPA_KERNEL .eq. COMPLEX_ELPA_KERNEL_GPU) then
if (check_for_gpu(my_pe, numberOfGPUDevices, wantDebug=wantDebug)) then
useGPU=.true.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment