Commit 2ede0ba8 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master' of gitlab.mpcdf.mpg.de:elpa/elpa

parents 69695acb 50b3ab16
#!/usr/bin/python
import sys
simple_tokens = [
"elpa_transpose_vectors_NUMBER_PRECISION",
"elpa_reduce_add_vectors_NUMBER_PRECISION",
"bandred_NUMBER_PRECISION",
"trans_ev_band_to_full_NUMBER_PRECISION",
"tridiag_band_NUMBER_PRECISION",
"trans_ev_tridi_to_band_NUMBER_PRECISION",
"band_band_NUMBER_PRECISION",
"tridiag_NUMBER_PRECISION",
"trans_ev_NUMBER_PRECISION",
"solve_tridi_PRECISION",
"solve_tridi_col_PRECISION",
"solve_tridi_single_problem_PRECISION",
"qr_pdgeqrf_2dcomm_PRECISION",
"hh_transform_NUMBER_PRECISION",
"symm_matrix_allreduce_PRECISION",
"redist_band_NUMBER_PRECISION",
"unpack_row_NUMBER_cpu_PRECISION",
"unpack_row_NUMBER_cpu_openmp_PRECISION",
"unpack_and_prepare_row_group_NUMBER_gpu_PRECISION",
"extract_hh_tau_NUMBER_gpu_PRECISION",
"compute_hh_dot_products_NUMBER_gpu_PRECISION",
"compute_hh_trafo_NUMBER_cpu_openmp_PRECISION",
"compute_hh_trafo_NUMBER_cpu_PRECISION",
"pack_row_group_NUMBER_gpu_PRECISION",
"pack_row_NUMBER_cpu_openmp_PRECISION",
"pack_row_NUMBER_cpu_PRECISION",
"wy_gen_PRECISION",
"wy_right_PRECISION",
"wy_left_PRECISION",
"wy_symm_PRECISION",
"merge_recursive_PRECISION",
"merge_systems_PRECISION",
"distribute_global_column_PRECISION",
"check_monotony_PRECISION",
"global_gather_PRECISION",
"resort_ev_PRECISION",
"transform_columns_PRECISION",
"solve_secular_equation_PRECISION",
"global_product_PRECISION",
"add_tmp_PRECISION",
"v_add_s_PRECISION",
]
blas_tokens = [
"PRECISION_GEMV",
"PRECISION_TRMV",
"PRECISION_GEMM",
"PRECISION_TRMM",
"PRECISION_HERK",
"PRECISION_SYRK",
"PRECISION_SYMV",
"PRECISION_SYMM",
"PRECISION_SYR2",
"PRECISION_SYR2K",
"PRECISION_GEQRF",
"PRECISION_STEDC",
"PRECISION_STEQR",
"PRECISION_LAMRG",
"PRECISION_LAMCH",
"PRECISION_LAPY2",
"PRECISION_LAED4",
"PRECISION_LAED5",
"cublas_PRECISION_GEMM",
"cublas_PRECISION_TRMM",
"cublas_PRECISION_GEMV",
]
explicit_tokens_complex = [
("PRECISION_SUFFIX", "\"_double\"", "\"_single\""),
("MPI_COMPLEX_PRECISION", "MPI_DOUBLE_COMPLEX", "MPI_COMPLEX"),
("MPI_REAL_PRECISION", "MPI_REAL8", "MPI_REAL4"),
("KIND_PRECISION", "rk8", "rk4"),
("PRECISION_CMPLX", "DCMPLX", "CMPLX"),
("PRECISION_IMAG", "DIMAG", "AIMAG"),
("PRECISION_REAL", "DREAL", "REAL"),
("CONST_REAL_0_0", "0.0_rk8", "0.0_rk4"),
("CONST_REAL_1_0", "1.0_rk8", "1.0_rk4"),
("CONST_COMPLEX_0_0", "0.0_ck8", "0.0_ck4"),
("size_of_PRECISION_complex", "size_of_double_complex_datatype", "size_of_single_complex_datatype"),
]
explicit_tokens_real = [
("PRECISION_SUFFIX", "\"_double\"", "\"_single\""),
("CONST_0_0", "0.0_rk8", "0.0_rk4"),
("CONST_0_5", "0.5_rk8", "0.5_rk4"),
("CONST_1_0", "1.0_rk8", "1.0_rk4"),
("CONST_2_0", "2.0_rk8", "2.0_rk4"),
("CONST_8_0", "8.0_rk8", "8.0_rk4"),
("size_of_PRECISION_real", "size_of_double_real_datatype", "size_of_single_real_datatype"),
("MPI_REAL_PRECISION", "MPI_REAL8", "MPI_REAL4"),
]
explicit_order = {"single":2, "double":1}
blas_prefixes = {("real","single") : "S", ("real","double") : "D", ("complex","single") : "C", ("complex","double") : "Z"}
def print_variant(number, precision, explicit):
for token in simple_tokens:
print "#define ", token.replace("NUMBER", number), token.replace("PRECISION", precision).replace("NUMBER", number)
for token in blas_tokens:
print "#define ", token, token.replace("PRECISION_", blas_prefixes[(number, precision)])
for token in explicit:
print "#define ", token[0], token[explicit_order[precision]]
def print_undefs(number, explicit):
for token in simple_tokens:
print "#undef ", token.replace("NUMBER", number)
for token in blas_tokens:
print "#undef ", token
for token in explicit:
print "#undef ", token[0]
if(sys.argv[1] == "complex"):
print "#ifdef DOUBLE_PRECISION_COMPLEX"
print_undefs("complex", explicit_tokens_complex)
print_variant("complex", "double", explicit_tokens_complex)
print "#else"
print_undefs("complex", explicit_tokens_complex)
print_variant("complex", "single", explicit_tokens_complex)
print "#endif"
elif(sys.argv[1] == "real"):
print "#ifdef DOUBLE_PRECISION_REAL"
print_undefs("real", explicit_tokens_real)
print_variant("real", "double", explicit_tokens_real)
print "#else"
print_undefs("real", explicit_tokens_real)
print_variant("real", "single", explicit_tokens_real)
print "#endif"
else:
assert(False)
\ No newline at end of file
#!/usr/bin/python
simple_tokens = ["tridiag_complex_PRECISION",
"trans_ev_complex_PRECISION",
"solve_complex_PRECISION",
"hh_transform_complex_PRECISION",
"elpa_transpose_vectors_complex_PRECISION",
"elpa_reduce_add_vectors_complex_PRECISION",
]
blas_tokens = ["PRECISION_GEMV",
"PRECISION_TRMV",
"PRECISION_GEMM",
"PRECISION_TRMM",
"PRECISION_HERK",
"cublas_PRECISION_gemm",
"cublas_PRECISION_trmm",
"cublas_PRECISION_gemv",
]
explicit_tokens = [("PRECISION_SUFFIX", "\"_double\"", "\"_single\""),
("MPI_COMPLEX_PRECISION", "MPI_DOUBLE_COMPLEX", "MPI_COMPLEX"),
("MPI_REAL_PRECISION", "MPI_REAL8", "MPI_REAL4"),
("KIND_PRECISION", "rk8", "rk4"),
("PRECISION_CMPLX", "DCMPLX", "CMPLX"),
("PRECISION_IMAG", "DIMAG", "AIMAG"),
("PRECISION_REAL", "DREAL", "REAL"),
("CONST_REAL_0_0", "0.0_rk8", "0.0_rk4"),
("CONST_REAL_1_0", "1.0_rk8", "1.0_rk4"),
("size_of_PRECISION_complex", "size_of_double_complex_datatype", "size_of_single_complex_datatype"),
]
print "#ifdef DOUBLE_PRECISION_COMPLEX"
for token in simple_tokens:
print "#define ", token, token.replace("PRECISION", "double")
for token in blas_tokens:
print "#define ", token, token.replace("PRECISION_", "Z")
for token in explicit_tokens:
print "#define ", token[0], token[1]
print "#else"
for token in simple_tokens:
print "#undef ", token
for token in blas_tokens:
print "#undef ", token
for token in explicit_tokens:
print "#undef ", token[0]
for token in simple_tokens:
print "#define ", token, token.replace("PRECISION", "single")
for token in blas_tokens:
print "#define ", token, token.replace("PRECISION_", "C")
for token in explicit_tokens:
print "#define ", token[0], token[2]
print "#endif"
This diff is collapsed.
......@@ -52,7 +52,7 @@
! distributed along with the original code in the file "COPYING".
#endif
subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
subroutine solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, wantDebug, success )
#ifdef HAVE_DETAILED_TIMINGS
......@@ -81,7 +81,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi" // M_PRECISION_SUFFIX)
call timer%start("solve_tridi" // PRECISION_SUFFIX)
call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
......@@ -96,7 +96,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q
! Set Q to 0
q(1:l_rows, 1:l_cols) = M_CONST_0_0
q(1:l_rows, 1:l_cols) = CONST_0_0
! Get the limits of the subdivisons, each subdivison has as many cols
! as fit on the respective processor column
......@@ -116,7 +116,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
! Scalapack supports it but delivers no results for these columns,
! which is rather annoying
if (nc==0) then
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX)
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
if (wantDebug) write(error_unit,*) 'ELPA1_solve_tridi: ERROR: Problem contains processor column with zero width'
success = .false.
return
......@@ -141,10 +141,10 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
else
nev1 = MIN(nev,l_cols)
endif
call M_solve_tridi_col_PRECISION(l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
call solve_tridi_col_PRECISION(l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
matrixCols, mpi_comm_rows, wantDebug, success)
if (.not.(success)) then
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX)
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
! If there is only 1 processor column, we are done
......@@ -156,7 +156,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop
endif
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX)
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
......@@ -215,9 +215,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
enddo
! Recursively merge sub problems
call M_merge_recursive_PRECISION(0, np_cols, wantDebug, success)
call merge_recursive_PRECISION(0, np_cols, wantDebug, success)
if (.not.(success)) then
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX)
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
......@@ -227,11 +227,11 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop
endif
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX)
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
contains
recursive subroutine M_merge_recursive_PRECISION(np_off, nprocs, wantDebug, success)
recursive subroutine merge_recursive_PRECISION(np_off, nprocs, wantDebug, success)
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
......@@ -264,9 +264,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
np1 = nprocs/2
np2 = nprocs-np1
if (np1 > 1) call M_merge_recursive_PRECISION(np_off, np1, wantDebug, success)
if (np1 > 1) call merge_recursive_PRECISION(np_off, np1, wantDebug, success)
if (.not.(success)) return
if (np2 > 1) call M_merge_recursive_PRECISION(np_off+np1, np2, wantDebug, success)
if (np2 > 1) call merge_recursive_PRECISION(np_off+np1, np2, wantDebug, success)
if (.not.(success)) return
noff = limits(np_off)
......@@ -277,7 +277,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
call timer%start("mpi_communication")
if (my_pcol==np_off) then
do n=np_off+np1,np_off+nprocs-1
call mpi_send(d(noff+1), nmid, M_MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
call mpi_send(d(noff+1), nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
enddo
endif
call timer%stop("mpi_communication")
......@@ -286,7 +286,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then
#ifdef WITH_MPI
call timer%start("mpi_communication")
call mpi_recv(d(noff+1), nmid, M_MPI_REAL_PRECISION, np_off, 1, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call mpi_recv(d(noff+1), nmid, MPI_REAL_PRECISION, np_off, 1, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication")
#else /* WITH_MPI */
! d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1)
......@@ -297,7 +297,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
do n=np_off,np_off+np1-1
#ifdef WITH_MPI
call timer%start("mpi_communication")
call mpi_send(d(noff+nmid+1), nlen-nmid, M_MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
call mpi_send(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
call timer%stop("mpi_communication")
#endif /* WITH_MPI */
......@@ -306,7 +306,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
if (my_pcol>=np_off .and. my_pcol<np_off+np1) then
#ifdef WITH_MPI
call timer%start("mpi_communication")
call mpi_recv(d(noff+nmid+1), nlen-nmid, M_MPI_REAL_PRECISION, np_off+np1, 1,mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call mpi_recv(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, np_off+np1, 1,mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication")
#else /* WITH_MPI */
! d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1)
......@@ -316,22 +316,22 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
! Last merge, result distribution must be block cyclic, noff==0,
! p_col_bc is set so that only nev eigenvalues are calculated
call M_merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
call merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col, p_col, &
l_col_bc, p_col_bc, np_off, nprocs, wantDebug, success )
if (.not.(success)) return
else
! Not last merge, leave dense column distribution
call M_merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
call merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col(noff+1), p_col(noff+1), &
l_col(noff+1), p_col(noff+1), np_off, nprocs, wantDebug, success )
if (.not.(success)) return
endif
end subroutine M_merge_recursive_PRECISION
end subroutine merge_recursive_PRECISION
end subroutine M_solve_tridi_PRECISION
end subroutine solve_tridi_PRECISION
subroutine M_solve_tridi_col_PRECISION( na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success )
subroutine solve_tridi_col_PRECISION( na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success )
! Solves the symmetric, tridiagonal eigenvalue problem on one processor column
! with the divide and conquer method.
......@@ -365,7 +365,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi_col" // M_PRECISION_SUFFIX)
call timer%start("solve_tridi_col" // PRECISION_SUFFIX)
call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
......@@ -427,7 +427,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
noff = limits(n) ! Start of subproblem
nlen = limits(n+1)-noff ! Size of subproblem
call M_solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1), &
call solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1), &
q(nqoff+noff+1,noff+1),ubound(q,dim=1), wantDebug, success)
if (.not.(success)) return
......@@ -456,7 +456,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
noff = limits(my_prow) ! Start of subproblem
nlen = limits(my_prow+1)-noff ! Size of subproblem
call M_solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1),qmat1, &
call solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1),qmat1, &
ubound(qmat1,dim=1), wantDebug, success)
if (.not.(success)) return
......@@ -470,9 +470,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
nlen = limits(np+1)-noff
#ifdef WITH_MPI
call timer%start("mpi_communication")
call MPI_Bcast(d(noff+1), nlen, M_MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
call MPI_Bcast(d(noff+1), nlen, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
qmat2 = qmat1
call MPI_Bcast(qmat2, max_size*max_size, M_MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
call MPI_Bcast(qmat2, max_size*max_size, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
call timer%stop("mpi_communication")
#else /* WITH_MPI */
! qmat2 = qmat1 ! is this correct
......@@ -480,9 +480,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
do i=1,nlen
#ifdef WITH_MPI
call M_distribute_global_column_PRECISION(qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
call distribute_global_column_PRECISION(qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#else /* WITH_MPI */
call M_distribute_global_column_PRECISION(qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
call distribute_global_column_PRECISION(qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#endif /* WITH_MPI */
enddo
......@@ -525,7 +525,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
! Last merge, set p_col_o=-1 for unneeded (output) eigenvectors
p_col_o(nev+1:na) = -1
endif
call M_merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, &
call merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_self, l_col(noff+1), p_col_i(noff+1), &
l_col(noff+1), p_col_o(noff+1), 0, 1, wantDebug, success)
if (.not.(success)) return
......@@ -542,11 +542,11 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop
endif
call timer%stop("solve_tridi_col" // M_PRECISION_SUFFIX)
call timer%stop("solve_tridi_col" // PRECISION_SUFFIX)
end subroutine M_solve_tridi_col_PRECISION
end subroutine solve_tridi_col_PRECISION
recursive subroutine M_solve_tridi_single_problem_PRECISION(nlen, d, e, q, ldq, wantDebug, success)
recursive subroutine solve_tridi_single_problem_PRECISION(nlen, d, e, q, ldq, wantDebug, success)
! Solves the symmetric, tridiagonal eigenvalue problem on a single processor.
! Takes precautions if DSTEDC fails or if the eigenvalues are not ordered correctly.
......@@ -572,7 +572,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi_single" // M_PRECISION_SUFFIX)
call timer%start("solve_tridi_single" // PRECISION_SUFFIX)
success = .true.
allocate(ds(nlen), es(nlen), stat=istat, errmsg=errorMessage)
......@@ -596,7 +596,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop
endif
call M_PRECISION_STEDC('I', nlen, d, e, q, ldq, work, lwork, iwork, liwork, info)
call PRECISION_STEDC('I', nlen, d, e, q, ldq, work, lwork, iwork, liwork, info)
if (info /= 0) then
......@@ -606,7 +606,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
d(:) = ds(:)
e(:) = es(:)
call M_PRECISION_STEQR('I', nlen, d, e, q, ldq, work, info)
call PRECISION_STEQR('I', nlen, d, e, q, ldq, work, info)
! If DSTEQR fails also, we don't know what to do further ...
if (info /= 0) then
......@@ -666,7 +666,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
endif
enddo
call timer%stop("solve_tridi_single" // M_PRECISION_SUFFIX)
call timer%stop("solve_tridi_single" // PRECISION_SUFFIX)
end subroutine M_solve_tridi_single_problem_PRECISION
end subroutine solve_tridi_single_problem_PRECISION
......@@ -54,16 +54,16 @@
#if REALCASE == 1
subroutine M_v_add_s_PRECISION(v,n,s)
subroutine v_add_s_PRECISION(v,n,s)
use precision
implicit none
integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: v(n),s
v(:) = v(:) + s
end subroutine M_v_add_s_PRECISION
end subroutine v_add_s_PRECISION
subroutine M_distribute_global_column_PRECISION(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
subroutine distribute_global_column_PRECISION(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
implicit none
......@@ -88,9 +88,9 @@
l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
enddo
end subroutine M_distribute_global_column_PRECISION
end subroutine distribute_global_column_PRECISION
subroutine M_solve_secular_equation_PRECISION(n, i, d, z, delta, rho, dlam)
subroutine solve_secular_equation_PRECISION(n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
......@@ -159,7 +159,7 @@
! Upper and lower bound of the shifted solution interval are a and b
call timer%start("solve_secular_equation" // M_PRECISION_SUFFIX)
call timer%start("solve_secular_equation" // PRECISION_SUFFIX)
if (i==n) then
! Special case: Last eigenvalue
......@@ -169,15 +169,15 @@
dshift = d(n)
delta(:) = d(:) - dshift
a = M_CONST_0_0 ! delta(n)
b = rho*SUM(z(:)**2) + M_CONST_1_0 ! rho*SUM(z(:)**2) is the lower bound for the guess
a = CONST_0_0 ! delta(n)
b = rho*SUM(z(:)**2) + CONST_1_0 ! rho*SUM(z(:)**2) is the lower bound for the guess
else
! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
! We check the sign of the function in the midpoint of the interval
! in order to determine if eigenvalue is more close to d(i) or d(i+1)
x = M_CONST_0_5*(d(i)+d(i+1))
y = M_CONST_1_0 + rho*SUM(z(:)**2/(d(:)-x))
x = CONST_0_5*(d(i)+d(i+1))
y = CONST_1_0 + rho*SUM(z(:)**2/(d(:)-x))
if (y>0) then
! solution is next to d(i)
dshift = d(i)
......@@ -197,7 +197,7 @@
do iter=1,200
! Interval subdivision
x = M_CONST_0_5*(a+b)
x = CONST_0_5*(a+b)
if (x==a .or. x==b) exit ! No further interval subdivisions possible
#ifdef DOUBLE_PRECISION_REAL
if (abs(x) < 1.e-200_rk8) exit ! x next to pole
......@@ -223,14 +223,14 @@
dlam = x + dshift
delta(:) = delta(:) - x
call timer%stop("solve_secular_equation" // M_PRECISION_SUFFIX)
call timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
end subroutine M_solve_secular_equation_PRECISION
end subroutine solve_secular_equation_PRECISION
!-------------------------------------------------------------------------------
#endif
#if REALCASE == 1
subroutine M_hh_transform_real_PRECISION(alpha, xnorm_sq, xf, tau)
subroutine hh_transform_real_PRECISION(alpha, xnorm_sq, xf, tau)
! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
#endif
#if COMPLEXCASE == 1
......@@ -266,7 +266,7 @@
real(kind=REAL_DATATYPE) :: BETA
#if REALCASE == 1
call timer%start("hh_transform_real" // M_PRECISION_SUFFIX )
call timer%start("hh_transform_real" // PRECISION_SUFFIX )
#endif
#if COMPLEXCASE == 1
call timer%start("hh_transform_complex" // PRECISION_SUFFIX )
......@@ -332,14 +332,14 @@
endif
#if REALCASE == 1
call timer%stop("hh_transform_real" // M_PRECISION_SUFFIX )
call timer%stop("hh_transform_real" // PRECISION_SUFFIX )
#endif
#if COMPLEXCASE == 1
call timer%stop("hh_transform_complex" // PRECISION_SUFFIX )
#endif
#if REALCASE == 1
end subroutine M_hh_transform_real_PRECISION
end subroutine hh_transform_real_PRECISION
#endif
#if COMPLEXCASE == 1
end subroutine hh_transform_complex_PRECISION
......
......@@ -87,7 +87,7 @@
!>
#if REALCASE == 1
subroutine M_trans_ev_real_PRECISION (na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
subroutine trans_ev_real_PRECISION (na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
#endif
#if COMPLEXCASE == 1
subroutine trans_ev_complex_PRECISION(na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
......@@ -156,7 +156,7 @@
logical :: successCUDA
#if REALCASE == 1
call timer%start("trans_ev_real" // M_PRECISION_SUFFIX)
call timer%start("trans_ev_real" // PRECISION_SUFFIX)
#endif
#if COMPLEXCASE == 1
call timer%start("trans_ev_complex" // PRECISION_SUFFIX)
......@@ -256,7 +256,7 @@
#endif
#if REALCASE == 1
successCUDA = cuda_malloc(tmat_dev, max_stored_rows * max_stored_rows * M_size_of_PRECISION_real)
successCUDA = cuda_malloc(tmat_dev, max_stored_rows * max_stored_rows * size_of_PRECISION_real)
check_alloc_cuda("trans_ev", successCUDA)
#endif
#if COMPLEXCASE == 1
......@@ -265,7 +265,7 @@
#endif
#if REALCASE == 1
successCUDA = cuda_malloc(hvm_dev, max_local_rows * max_stored_rows * M_size_of_PRECISION_real)
successCUDA = cuda_malloc(hvm_dev, max_local_rows * max_stored_rows * size_of_PRECISION_real)
check_alloc_cuda("trans_ev", successCUDA)
#endif
#if COMPLEXCASE == 1
......@@ -274,7 +274,7 @@
#endif
#if REALCASE == 1
successCUDA = cuda_malloc(tmp_dev, max_local_cols * max_stored_rows * M_size_of_PRECISION_real)
successCUDA = cuda_malloc(tmp_dev, max_local_cols * max_stored_rows * size_of_PRECISION_real)
check_alloc_cuda("trans_ev", successCUDA)
#endif
#if COMPLEXCASE == 1
......@@ -283,7 +283,7 @@
#endif
#if REALCASE == 1
successCUDA = cuda_malloc(q_dev, ldq * matrixCols * M_size_of_PRECISION_real)
successCUDA = cuda_malloc(q_dev, ldq * matrixCols * size_of_PRECISION_real)
check_alloc_cuda("trans_ev", successCUDA)
#endif
#if COMPLEXCASE == 1
......@@ -293,7 +293,7 @@
! q_dev = q_mat
#if REALCASE == 1
successCUDA = cuda_memcpy(q_dev, loc(q_mat(1,1)), ldq * matrixCols * M_size_of_PRECISION_real, cudaMemcpyHostToDevice)
successCUDA = cuda_memcpy(q_dev, loc(q_mat(1,1)), ldq * matrixCols * size_of_PRECISION_real, cudaMemcpyHostToDevice)
check_memcpy_cuda("trans_ev", successCUDA)
#endif
#if COMPLEXCASE == 1
......@@ -331,7 +331,7 @@
call timer%start("mpi_communication")
if (nb>0) &
#if REALCASE == 1
call MPI_Bcast(hvb, nb, M_MPI_REAL_PRECISION, cur_pcol, mpi_comm_cols, mpierr)
call MPI_Bcast(hvb, nb, MPI_REAL_PRECISION, cur_pcol, mpi_comm_cols, mpierr)
#endif
#if COMPLEXCASE == 1
call MPI_Bcast(hvb, nb, MPI_COMPLEX_PRECISION, cur_pcol, mpi_comm_cols, mpierr)
......@@ -359,9 +359,9 @@
tmat = 0
if (l_rows>0) &
#if REALCASE == 1
call M_PRECISION_SYRK('U', 'T', nstor, l_rows, &
M_CONST_1_0, hvm,