Commit 889616ed authored by Andreas Marek's avatar Andreas Marek
Browse files

Furhter cleanup of precision_macros.h

parent a2b5931c
......@@ -526,16 +526,13 @@
tau(istep) = v_row(l_rows+1)
! Transpose Householder vector v_row -> v_col
#if REALCASE == 1
call elpa_transpose_vectors_real_PRECISION (v_row, ubound(v_row,dim=1), mpi_comm_rows, &
v_col, ubound(v_col,dim=1), mpi_comm_cols, &
1, istep-1, 1, nblk)
#endif
#if COMPLEXCASE == 1
call elpa_transpose_vectors_complex_PRECISION (v_row, ubound(v_row,dim=1), mpi_comm_rows, &
v_col, ubound(v_col,dim=1), mpi_comm_cols, &
1, istep-1, 1, nblk)
#endif
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(v_row, ubound(v_row,dim=1), mpi_comm_rows, v_col, ubound(v_col,dim=1), mpi_comm_cols, &
1, istep-1, 1, nblk)
! Calculate u = (A + VU**T + UV**T)*v
! For cache efficiency, we use only the upper half of the matrix tiles for this,
......
......@@ -1066,17 +1066,29 @@
! Transpose vmr -> vmc (stored in umc, second half)
#if REALCASE == 1
if (useGPU) then
call elpa_transpose_vectors_real_PRECISION (vmrCUDA, cur_l_rows, mpi_comm_rows, &
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(vmrCUDA, cur_l_rows, mpi_comm_rows, &
umcCUDA(cur_l_cols * n_cols + 1), cur_l_cols, mpi_comm_cols, &
1, istep*nbw, n_cols, nblk)
else
call elpa_transpose_vectors_real_PRECISION (vmrCPU, ubound(vmrCPU,dim=1), mpi_comm_rows, &
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(vmrCPU, ubound(vmrCPU,dim=1), mpi_comm_rows, &
umcCPU(1,n_cols+1), ubound(umcCPU,dim=1), mpi_comm_cols, &
1, istep*nbw, n_cols, nblk)
endif
#endif
#if COMPLEXCASE == 1
call elpa_transpose_vectors_complex_PRECISION (vmrCPU, ubound(vmrCPU,dim=1), mpi_comm_rows, &
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(vmrCPU, ubound(vmrCPU,dim=1), mpi_comm_rows, &
umcCPU(1,n_cols+1), ubound(umcCPU,dim=1), mpi_comm_cols, &
1, istep*nbw, n_cols, nblk)
#endif
......@@ -1317,7 +1329,7 @@
stop
endif
endif ! useGPU
endif ! (l_cols>0 .and. l_rows>0)
endif ! (l_cols>0 .and. l_rows>0)
#endif /* COMPLEXCASE == 1 */
! Sum up all ur(:) parts along rows and add them to the uc(:) parts
......@@ -1330,7 +1342,11 @@
! here the GPU version and CPU version divereged due to the same reasons as above
if (tile_size < istep*nbw) then
call elpa_reduce_add_vectors_real_PRECISION (vmrCUDA(cur_l_rows * n_cols + 1),cur_l_rows,mpi_comm_rows, &
call elpa_reduce_add_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(vmrCUDA(cur_l_rows * n_cols + 1),cur_l_rows,mpi_comm_rows, &
umcCUDA, cur_l_cols, mpi_comm_cols, &
istep*nbw, n_cols, nblk)
endif
......@@ -1425,7 +1441,11 @@
endif
! Transpose umc -> umr (stored in vmr, second half)
call elpa_transpose_vectors_real_PRECISION (umcCUDA, cur_l_cols, mpi_comm_cols, &
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(umcCUDA, cur_l_cols, mpi_comm_cols, &
vmrCUDA(cur_l_rows * n_cols + 1), cur_l_rows, mpi_comm_rows, &
1, istep*nbw, n_cols, nblk)
......@@ -1460,7 +1480,11 @@
! Or if we used the Algorithm 4
if (tile_size < istep*nbw .or. n_way > 1) then
call elpa_reduce_add_vectors_real_PRECISION (vmrCPU(1,n_cols+1),ubound(vmrCPU,dim=1),mpi_comm_rows, &
call elpa_reduce_add_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(vmrCPU(1,n_cols+1),ubound(vmrCPU,dim=1),mpi_comm_rows, &
umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
istep*nbw, n_cols, nblk)
endif
......@@ -1510,7 +1534,11 @@
ubound(vav,dim=1), CONST_1_0, umcCPU, ubound(umcCPU,dim=1))
call timer%stop("blas")
! Transpose umc -> umr (stored in vmr, second half)
call elpa_transpose_vectors_real_PRECISION(umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1), mpi_comm_rows, &
1, istep*nbw, n_cols, nblk)
......@@ -1572,7 +1600,11 @@
#if COMPLEXCASE == 1
if (tile_size < istep*nbw) then
call elpa_reduce_add_vectors_complex_PRECISION (vmrCPU(1,n_cols+1),ubound(vmrCPU,dim=1),mpi_comm_rows, &
call elpa_reduce_add_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(vmrCPU(1,n_cols+1),ubound(vmrCPU,dim=1),mpi_comm_rows, &
umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
istep*nbw, n_cols, nblk)
endif
......@@ -1699,7 +1731,11 @@
print *, "bandred_complex: cuda memcpy failed umcCPU ", istat
stop
endif
call elpa_transpose_vectors_complex_PRECISION (umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1), mpi_comm_rows, &
1, istep*nbw, n_cols, nblk)
if (size(vmrCPU,dim=1)*size(vmrCPU,dim=2) .gt. vmr_size) then
......@@ -1727,8 +1763,11 @@
vav, ubound(vav,dim=1), CONE, umcCPU, ubound(umcCPU,dim=1))
call timer%stop("blas")
! Transpose umc -> umr (stored in vmr, second half)
call elpa_transpose_vectors_complex_PRECISION (umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(umcCPU, ubound(umcCPU,dim=1), mpi_comm_cols, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1), mpi_comm_rows, &
1, istep*nbw, n_cols, nblk)
endif
......
......@@ -73,7 +73,9 @@
subroutine band_band_real_PRECISION(na, nb, nbCol, nb2, nb2Col, ab, ab2, d, e, mpi_comm)
subroutine band_band_real_&
&PRECISION &
(na, nb, nbCol, nb2, nb2Col, ab, ab2, d, e, mpi_comm)
!-------------------------------------------------------------------------------
! band_band_real:
! Reduces a real symmetric banded matrix to a real symmetric matrix with smaller bandwidth. Householder transformations are not stored.
......
......@@ -659,14 +659,13 @@
!$omp parallel do private(my_thread), schedule(static, 1)
do my_thread = 1, max_threads
#if REALCASE == 1
call unpack_row_real_cpu_openmp_PRECISION(aIntern, row, i-limits(ip), my_thread, stripe_count, &
thread_width, stripe_width, l_nev)
#endif
#if COMPLEXCASE == 1
call unpack_row_complex_cpu_openmp_PRECISION(aIntern, row,i-limits(ip),my_thread, &
stripe_count, thread_width, stripe_width, l_nev)
#endif
call unpack_row_&
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION &
(aIntern, row, i-limits(ip), my_thread, stripe_count, &
thread_width, stripe_width, l_nev)
enddo
!$omp end parallel do
......@@ -719,12 +718,11 @@
#endif /* WITH_MPI */
#if REALCASE == 1
call unpack_row_real_cpu_PRECISION(aIntern, row,i-limits(ip), stripe_count, stripe_width, last_stripe_width)
#endif
#if COMPLEXCASE == 1
call unpack_row_complex_cpu_PRECISION(aIntern, row,i-limits(ip), stripe_count, stripe_width, last_stripe_width)
#endif
call unpack_row_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION &
(aIntern, row,i-limits(ip), stripe_count, stripe_width, last_stripe_width)
endif ! useGPU
#endif /* WITH_OPENMP */
......@@ -763,14 +761,12 @@
!$omp parallel do private(my_thread), schedule(static, 1)
do my_thread = 1, max_threads
#if REALCASE == 1
call unpack_row_real_cpu_openmp_PRECISION(aIntern, row, i-limits(ip), my_thread, &
stripe_count, thread_width, stripe_width, l_nev)
#endif
#if COMPLEXCASE == 1
call unpack_row_complex_cpu_openmp_PRECISION(aIntern, row,i-limits(ip),my_thread, &
stripe_count, thread_width, stripe_width, l_nev)
#endif
call unpack_row_&
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION &
(aIntern, row, i-limits(ip), my_thread, stripe_count, thread_width, stripe_width, l_nev)
enddo
!$omp end parallel do
......@@ -781,12 +777,11 @@
if (useGPU) then
else
#if REALCASE == 1
call unpack_row_real_cpu_PRECISION(aIntern, row,i-limits(ip), stripe_count, stripe_width, last_stripe_width)
#endif
#if COMPLEXCASE == 1
call unpack_row_complex_cpu_PRECISION(aIntern, row,i-limits(ip), stripe_count, stripe_width, last_stripe_width)
#endif
call unpack_row_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION &
(aIntern, row,i-limits(ip), stripe_count, stripe_width, last_stripe_width)
endif
#endif /* WITH_OPENMP */
......@@ -867,14 +862,11 @@
call timer%start("OpenMP parallel" // PRECISION_SUFFIX)
!$omp parallel do private(my_thread), schedule(static, 1)
do my_thread = 1, max_threads
#if REALCASE == 1
call unpack_row_real_cpu_openmp_PRECISION(aIntern, row, i-limits(my_prow), my_thread, &
stripe_count, thread_width, stripe_width, l_nev)
#endif
#if COMPLEXCASE == 1
call unpack_row_complex_cpu_openmp_PRECISION(aIntern, row,i-limits(my_prow),my_thread, &
stripe_count, thread_width, stripe_width, l_nev)
#endif
call unpack_row_&
&MATH_DATATYPE&
&_cpu_openmp_&
&PRECISION &
(aIntern, row, i-limits(my_prow), my_thread, stripe_count, thread_width, stripe_width, l_nev)
enddo
!$omp end parallel do
call timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
......@@ -934,13 +926,11 @@
! row(1:l_nev) = row(1:l_nev)
#endif
#if REALCASE == 1
call unpack_row_real_cpu_PRECISION(aIntern, row,i-limits(my_prow), stripe_count, stripe_width, last_stripe_width)
#endif
#if COMPLEXCASE == 1
call unpack_row_complex_cpu_PRECISION(aIntern, row,i-limits(my_prow), stripe_count, stripe_width, last_stripe_width)
#endif
call unpack_row_&
&MATH_DATATYPE&
&_cpu_&
&PRECISION &
(aIntern, row,i-limits(my_prow), stripe_count, stripe_width, last_stripe_width)
endif ! useGPU
#endif /* WITH_OPENMP */
......
......@@ -3,22 +3,6 @@
#define MATH_DATATYPE real
#undef PRECISION
#undef PRECISION_STR
#undef elpa_transpose_vectors_NUMBER_PRECISION
#undef elpa_transpose_vectors_NUMBER_PRECISION_STR
#undef elpa_transpose_vectors_real_PRECISION
#undef elpa_reduce_add_vectors_NUMBER_PRECISION
#undef elpa_reduce_add_vectors_NUMBER_PRECISION_STR
#undef elpa_reduce_add_vectors_real_PRECISION
#undef band_band_NUMBER_PRECISION
#undef band_band_NUMBER_PRECISION_STR
#undef band_band_real_PRECISION
#undef solve_tridi_col_PRECISION
#undef solve_tridi_col_PRECISION_STR
#undef solve_tridi_single_problem_PRECISION
#undef solve_tridi_single_problem_PRECISION_STR
#undef solve_evp_NUMBER_2stage_PRECISION
#undef solve_evp_NUMBER_2stage_PRECISION_STR
#undef solve_evp_real_2stage_PRECISION
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef qr_pdgeqrf_2dcomm_PRECISION_STR
#undef hh_transform_NUMBER_PRECISION
......@@ -31,12 +15,6 @@
#undef redist_band_NUMBER_PRECISION
#undef redist_band_NUMBER_PRECISION_STR
#undef redist_band_real_PRECISION
#undef unpack_row_NUMBER_cpu_PRECISION
#undef unpack_row_NUMBER_cpu_PRECISION_STR
#undef unpack_row_real_cpu_PRECISION
#undef unpack_row_NUMBER_cpu_openmp_PRECISION
#undef unpack_row_NUMBER_cpu_openmp_PRECISION_STR
#undef unpack_row_real_cpu_openmp_PRECISION
#undef unpack_and_prepare_row_group_NUMBER_gpu_PRECISION
#undef unpack_and_prepare_row_group_NUMBER_gpu_PRECISION_STR
#undef unpack_and_prepare_row_group_real_gpu_PRECISION
......@@ -144,22 +122,6 @@
#ifdef DOUBLE_PRECISION
#define PRECISION double
#define PRECISION_STR 'double'
#define elpa_transpose_vectors_NUMBER_PRECISION elpa_transpose_vectors_real_double
#define elpa_transpose_vectors_NUMBER_PRECISION_STR 'elpa_transpose_vectors_real_double'
#define elpa_transpose_vectors_real_PRECISION elpa_transpose_vectors_real_double
#define elpa_reduce_add_vectors_NUMBER_PRECISION elpa_reduce_add_vectors_real_double
#define elpa_reduce_add_vectors_NUMBER_PRECISION_STR 'elpa_reduce_add_vectors_real_double'
#define elpa_reduce_add_vectors_real_PRECISION elpa_reduce_add_vectors_real_double
#define band_band_NUMBER_PRECISION band_band_real_double
#define band_band_NUMBER_PRECISION_STR 'band_band_real_double'
#define band_band_real_PRECISION band_band_real_double
#define solve_tridi_col_PRECISION solve_tridi_col_double
#define solve_tridi_col_PRECISION_STR 'solve_tridi_col_double'
#define solve_tridi_single_problem_PRECISION solve_tridi_single_problem_double
#define solve_tridi_single_problem_PRECISION_STR 'solve_tridi_single_problem_double'
#define solve_evp_NUMBER_2stage_PRECISION solve_evp_real_2stage_double
#define solve_evp_NUMBER_2stage_PRECISION_STR 'solve_evp_real_2stage_double'
#define solve_evp_real_2stage_PRECISION solve_evp_real_2stage_double
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_double
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_double'
#define hh_transform_NUMBER_PRECISION hh_transform_real_double
......@@ -172,12 +134,6 @@
#define redist_band_NUMBER_PRECISION redist_band_real_double
#define redist_band_NUMBER_PRECISION_STR 'redist_band_real_double'
#define redist_band_real_PRECISION redist_band_real_double
#define unpack_row_NUMBER_cpu_PRECISION unpack_row_real_cpu_double
#define unpack_row_NUMBER_cpu_PRECISION_STR 'unpack_row_real_cpu_double'
#define unpack_row_real_cpu_PRECISION unpack_row_real_cpu_double
#define unpack_row_NUMBER_cpu_openmp_PRECISION unpack_row_real_cpu_openmp_double
#define unpack_row_NUMBER_cpu_openmp_PRECISION_STR 'unpack_row_real_cpu_openmp_double'
#define unpack_row_real_cpu_openmp_PRECISION unpack_row_real_cpu_openmp_double
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION unpack_and_prepare_row_group_real_gpu_double
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION_STR 'unpack_and_prepare_row_group_real_gpu_double'
#define unpack_and_prepare_row_group_real_gpu_PRECISION unpack_and_prepare_row_group_real_gpu_double
......@@ -286,22 +242,6 @@
#ifdef SINGLE_PRECISION
#define PRECISION single
#define PRECISION_STR 'single'
#define elpa_transpose_vectors_NUMBER_PRECISION elpa_transpose_vectors_real_single
#define elpa_transpose_vectors_NUMBER_PRECISION_STR 'elpa_transpose_vectors_real_single'
#define elpa_transpose_vectors_real_PRECISION elpa_transpose_vectors_real_single
#define elpa_reduce_add_vectors_NUMBER_PRECISION elpa_reduce_add_vectors_real_single
#define elpa_reduce_add_vectors_NUMBER_PRECISION_STR 'elpa_reduce_add_vectors_real_single'
#define elpa_reduce_add_vectors_real_PRECISION elpa_reduce_add_vectors_real_single
#define band_band_NUMBER_PRECISION band_band_real_single
#define band_band_NUMBER_PRECISION_STR 'band_band_real_single'
#define band_band_real_PRECISION band_band_real_single
#define solve_tridi_col_PRECISION solve_tridi_col_single
#define solve_tridi_col_PRECISION_STR 'solve_tridi_col_single'
#define solve_tridi_single_problem_PRECISION solve_tridi_single_problem_single
#define solve_tridi_single_problem_PRECISION_STR 'solve_tridi_single_problem_single'
#define solve_evp_NUMBER_2stage_PRECISION solve_evp_real_2stage_single
#define solve_evp_NUMBER_2stage_PRECISION_STR 'solve_evp_real_2stage_single'
#define solve_evp_real_2stage_PRECISION solve_evp_real_2stage_single
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_single
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_single'
#define hh_transform_NUMBER_PRECISION hh_transform_real_single
......@@ -314,12 +254,6 @@
#define redist_band_NUMBER_PRECISION redist_band_real_single
#define redist_band_NUMBER_PRECISION_STR 'redist_band_real_single'
#define redist_band_real_PRECISION redist_band_real_single
#define unpack_row_NUMBER_cpu_PRECISION unpack_row_real_cpu_single
#define unpack_row_NUMBER_cpu_PRECISION_STR 'unpack_row_real_cpu_single'
#define unpack_row_real_cpu_PRECISION unpack_row_real_cpu_single
#define unpack_row_NUMBER_cpu_openmp_PRECISION unpack_row_real_cpu_openmp_single
#define unpack_row_NUMBER_cpu_openmp_PRECISION_STR 'unpack_row_real_cpu_openmp_single'
#define unpack_row_real_cpu_openmp_PRECISION unpack_row_real_cpu_openmp_single
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION unpack_and_prepare_row_group_real_gpu_single
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION_STR 'unpack_and_prepare_row_group_real_gpu_single'
#define unpack_and_prepare_row_group_real_gpu_PRECISION unpack_and_prepare_row_group_real_gpu_single
......@@ -431,22 +365,6 @@
#define MATH_DATATYPE complex
#undef PRECISION
#undef PRECISION_STR
#undef elpa_transpose_vectors_NUMBER_PRECISION
#undef elpa_transpose_vectors_NUMBER_PRECISION_STR
#undef elpa_transpose_vectors_complex_PRECISION
#undef elpa_reduce_add_vectors_NUMBER_PRECISION
#undef elpa_reduce_add_vectors_NUMBER_PRECISION_STR
#undef elpa_reduce_add_vectors_complex_PRECISION
#undef band_band_NUMBER_PRECISION
#undef band_band_NUMBER_PRECISION_STR
#undef band_band_complex_PRECISION
#undef solve_tridi_col_PRECISION
#undef solve_tridi_col_PRECISION_STR
#undef solve_tridi_single_problem_PRECISION
#undef solve_tridi_single_problem_PRECISION_STR
#undef solve_evp_NUMBER_2stage_PRECISION
#undef solve_evp_NUMBER_2stage_PRECISION_STR
#undef solve_evp_complex_2stage_PRECISION
#undef qr_pdgeqrf_2dcomm_PRECISION
#undef qr_pdgeqrf_2dcomm_PRECISION_STR
#undef hh_transform_NUMBER_PRECISION
......@@ -459,12 +377,6 @@
#undef redist_band_NUMBER_PRECISION
#undef redist_band_NUMBER_PRECISION_STR
#undef redist_band_complex_PRECISION
#undef unpack_row_NUMBER_cpu_PRECISION
#undef unpack_row_NUMBER_cpu_PRECISION_STR
#undef unpack_row_complex_cpu_PRECISION
#undef unpack_row_NUMBER_cpu_openmp_PRECISION
#undef unpack_row_NUMBER_cpu_openmp_PRECISION_STR
#undef unpack_row_complex_cpu_openmp_PRECISION
#undef unpack_and_prepare_row_group_NUMBER_gpu_PRECISION
#undef unpack_and_prepare_row_group_NUMBER_gpu_PRECISION_STR
#undef unpack_and_prepare_row_group_complex_gpu_PRECISION
......@@ -582,22 +494,6 @@
#ifdef DOUBLE_PRECISION
#define PRECISION double
#define PRECISION_STR 'double'
#define elpa_transpose_vectors_NUMBER_PRECISION elpa_transpose_vectors_complex_double
#define elpa_transpose_vectors_NUMBER_PRECISION_STR 'elpa_transpose_vectors_complex_double'
#define elpa_transpose_vectors_complex_PRECISION elpa_transpose_vectors_complex_double
#define elpa_reduce_add_vectors_NUMBER_PRECISION elpa_reduce_add_vectors_complex_double
#define elpa_reduce_add_vectors_NUMBER_PRECISION_STR 'elpa_reduce_add_vectors_complex_double'
#define elpa_reduce_add_vectors_complex_PRECISION elpa_reduce_add_vectors_complex_double
#define band_band_NUMBER_PRECISION band_band_complex_double
#define band_band_NUMBER_PRECISION_STR 'band_band_complex_double'
#define band_band_complex_PRECISION band_band_complex_double
#define solve_tridi_col_PRECISION solve_tridi_col_double
#define solve_tridi_col_PRECISION_STR 'solve_tridi_col_double'
#define solve_tridi_single_problem_PRECISION solve_tridi_single_problem_double
#define solve_tridi_single_problem_PRECISION_STR 'solve_tridi_single_problem_double'
#define solve_evp_NUMBER_2stage_PRECISION solve_evp_complex_2stage_double
#define solve_evp_NUMBER_2stage_PRECISION_STR 'solve_evp_complex_2stage_double'
#define solve_evp_complex_2stage_PRECISION solve_evp_complex_2stage_double
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_double
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_double'
#define hh_transform_NUMBER_PRECISION hh_transform_complex_double
......@@ -610,12 +506,6 @@
#define redist_band_NUMBER_PRECISION redist_band_complex_double
#define redist_band_NUMBER_PRECISION_STR 'redist_band_complex_double'
#define redist_band_complex_PRECISION redist_band_complex_double
#define unpack_row_NUMBER_cpu_PRECISION unpack_row_complex_cpu_double
#define unpack_row_NUMBER_cpu_PRECISION_STR 'unpack_row_complex_cpu_double'
#define unpack_row_complex_cpu_PRECISION unpack_row_complex_cpu_double
#define unpack_row_NUMBER_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_double
#define unpack_row_NUMBER_cpu_openmp_PRECISION_STR 'unpack_row_complex_cpu_openmp_double'
#define unpack_row_complex_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_double
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION unpack_and_prepare_row_group_complex_gpu_double
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION_STR 'unpack_and_prepare_row_group_complex_gpu_double'
#define unpack_and_prepare_row_group_complex_gpu_PRECISION unpack_and_prepare_row_group_complex_gpu_double
......@@ -734,22 +624,6 @@
#ifdef SINGLE_PRECISION
#define PRECISION single
#define PRECISION_STR 'single'
#define elpa_transpose_vectors_NUMBER_PRECISION elpa_transpose_vectors_complex_single
#define elpa_transpose_vectors_NUMBER_PRECISION_STR 'elpa_transpose_vectors_complex_single'
#define elpa_transpose_vectors_complex_PRECISION elpa_transpose_vectors_complex_single
#define elpa_reduce_add_vectors_NUMBER_PRECISION elpa_reduce_add_vectors_complex_single
#define elpa_reduce_add_vectors_NUMBER_PRECISION_STR 'elpa_reduce_add_vectors_complex_single'
#define elpa_reduce_add_vectors_complex_PRECISION elpa_reduce_add_vectors_complex_single
#define band_band_NUMBER_PRECISION band_band_complex_single
#define band_band_NUMBER_PRECISION_STR 'band_band_complex_single'
#define band_band_complex_PRECISION band_band_complex_single
#define solve_tridi_col_PRECISION solve_tridi_col_single
#define solve_tridi_col_PRECISION_STR 'solve_tridi_col_single'
#define solve_tridi_single_problem_PRECISION solve_tridi_single_problem_single
#define solve_tridi_single_problem_PRECISION_STR 'solve_tridi_single_problem_single'
#define solve_evp_NUMBER_2stage_PRECISION solve_evp_complex_2stage_single
#define solve_evp_NUMBER_2stage_PRECISION_STR 'solve_evp_complex_2stage_single'
#define solve_evp_complex_2stage_PRECISION solve_evp_complex_2stage_single
#define qr_pdgeqrf_2dcomm_PRECISION qr_pdgeqrf_2dcomm_single
#define qr_pdgeqrf_2dcomm_PRECISION_STR 'qr_pdgeqrf_2dcomm_single'
#define hh_transform_NUMBER_PRECISION hh_transform_complex_single
......@@ -762,12 +636,6 @@
#define redist_band_NUMBER_PRECISION redist_band_complex_single
#define redist_band_NUMBER_PRECISION_STR 'redist_band_complex_single'
#define redist_band_complex_PRECISION redist_band_complex_single
#define unpack_row_NUMBER_cpu_PRECISION unpack_row_complex_cpu_single
#define unpack_row_NUMBER_cpu_PRECISION_STR 'unpack_row_complex_cpu_single'
#define unpack_row_complex_cpu_PRECISION unpack_row_complex_cpu_single
#define unpack_row_NUMBER_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_single
#define unpack_row_NUMBER_cpu_openmp_PRECISION_STR 'unpack_row_complex_cpu_openmp_single'
#define unpack_row_complex_cpu_openmp_PRECISION unpack_row_complex_cpu_openmp_single
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION unpack_and_prepare_row_group_complex_gpu_single
#define unpack_and_prepare_row_group_NUMBER_gpu_PRECISION_STR 'unpack_and_prepare_row_group_complex_gpu_single'
#define unpack_and_prepare_row_group_complex_gpu_PRECISION unpack_and_prepare_row_group_complex_gpu_single
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment