Commit ab0dfe98 authored by Pavel Kus's avatar Pavel Kus
Browse files

real precision header generated. Removed M_

parent 5cf0ae52
#!/usr/bin/python
import sys
simple_tokens = [
"elpa_transpose_vectors_NUMBER_PRECISION",
"elpa_reduce_add_vectors_NUMBER_PRECISION",
"bandred_NUMBER_PRECISION",
"trans_ev_band_to_full_NUMBER_PRECISION",
"tridiag_band_NUMBER_PRECISION",
"trans_ev_tridi_to_band_NUMBER_PRECISION",
"band_band_NUMBER_PRECISION",
"tridiag_NUMBER_PRECISION",
"trans_ev_NUMBER_PRECISION",
"solve_tridi_PRECISION",
"solve_tridi_col_PRECISION",
"solve_tridi_single_problem_PRECISION",
"qr_pdgeqrf_2dcomm_PRECISION",
"hh_transform_NUMBER_PRECISION",
"symm_matrix_allreduce_PRECISION",
"redist_band_NUMBER_PRECISION",
"unpack_row_NUMBER_cpu_PRECISION",
"unpack_row_NUMBER_cpu_openmp_PRECISION",
"unpack_and_prepare_row_group_NUMBER_gpu_PRECISION",
"extract_hh_tau_NUMBER_gpu_PRECISION",
"compute_hh_dot_products_NUMBER_gpu_PRECISION",
"compute_hh_trafo_NUMBER_cpu_openmp_PRECISION",
"compute_hh_trafo_NUMBER_cpu_PRECISION",
"pack_row_group_NUMBER_gpu_PRECISION",
"pack_row_NUMBER_cpu_openmp_PRECISION",
"pack_row_NUMBER_cpu_PRECISION",
"wy_gen_PRECISION",
"wy_right_PRECISION",
"wy_left_PRECISION",
"wy_symm_PRECISION",
"merge_recursive_PRECISION",
"merge_systems_PRECISION",
"distribute_global_column_PRECISION",
"check_monotony_PRECISION",
"global_gather_PRECISION",
"resort_ev_PRECISION",
"transform_columns_PRECISION",
"solve_secular_equation_PRECISION",
"global_product_PRECISION",
"add_tmp_PRECISION",
"v_add_s_PRECISION",
]
blas_tokens = [
"PRECISION_GEMV",
"PRECISION_TRMV",
"PRECISION_GEMM",
"PRECISION_TRMM",
"PRECISION_HERK",
"PRECISION_SYRK",
"PRECISION_SYMV",
"PRECISION_SYMM",
"PRECISION_SYR2",
"PRECISION_SYR2K",
"PRECISION_GEQRF",
"PRECISION_STEDC",
"PRECISION_STEQR",
"PRECISION_LAMRG",
"PRECISION_LAMCH",
"PRECISION_LAPY2",
"PRECISION_LAED4",
"PRECISION_LAED5",
"cublas_PRECISION_gemm",
"cublas_PRECISION_trmm",
"cublas_PRECISION_gemv",
]
explicit_tokens_complex = [
("PRECISION_SUFFIX", "\"_double\"", "\"_single\""),
("MPI_COMPLEX_PRECISION", "MPI_DOUBLE_COMPLEX", "MPI_COMPLEX"),
("MPI_REAL_PRECISION", "MPI_REAL8", "MPI_REAL4"),
("KIND_PRECISION", "rk8", "rk4"),
("PRECISION_CMPLX", "DCMPLX", "CMPLX"),
("PRECISION_IMAG", "DIMAG", "AIMAG"),
("PRECISION_REAL", "DREAL", "REAL"),
("CONST_REAL_0_0", "0.0_rk8", "0.0_rk4"),
("CONST_REAL_1_0", "1.0_rk8", "1.0_rk4"),
("size_of_PRECISION_complex", "size_of_double_complex_datatype", "size_of_single_complex_datatype"),
]
explicit_tokens_real = [
("PRECISION_SUFFIX", "\"_double\"", "\"_single\""),
("CONST_0_0", "0.0_rk8", "0.0_rk4"),
("CONST_0_5", "0.5_rk8", "0.5_rk4"),
("CONST_1_0", "1.0_rk8", "1.0_rk4"),
("CONST_2_0", "2.0_rk8", "2.0_rk4"),
("CONST_8_0", "8.0_rk8", "8.0_rk4"),
("size_of_PRECISION_real", "size_of_double_real_datatype", "size_of_single_real_datatype"),
("MPI_REAL_PRECISION", "MPI_REAL8", "MPI_REAL4"),
]
explicit_order = {"single":2, "double":1}
blas_prefixes = {("real","single") : "S", ("real","double") : "D", ("complex","single") : "C", ("complex","double") : "Z"}
def print_variant(number, precision, explicit):
for token in simple_tokens:
print "#define ", token.replace("NUMBER", number), token.replace("PRECISION", precision).replace("NUMBER", number)
for token in blas_tokens:
print "#define ", token, token.replace("PRECISION_", blas_prefixes[(number, precision)])
for token in explicit:
print "#define ", token[0], token[explicit_order[precision]]
def print_undefs(number, explicit):
for token in simple_tokens:
print "#undef ", token.replace("NUMBER", number)
for token in blas_tokens:
print "#undef ", token
for token in explicit:
print "#undef ", token[0]
if(sys.argv[1] == "complex"):
print "#ifdef DOUBLE_PRECISION_COMPLEX"
print_undefs("complex", explicit_tokens_complex)
print_variant("complex", "double", explicit_tokens_complex)
print "#else"
print_undefs("complex", explicit_tokens_complex)
print_variant("complex", "single", explicit_tokens_complex)
print "#endif"
elif(sys.argv[1] == "real"):
print "#ifdef DOUBLE_PRECISION_REAL"
print_undefs("real", explicit_tokens_real)
print_variant("real", "double", explicit_tokens_real)
print "#else"
print_undefs("real", explicit_tokens_real)
print_variant("real", "single", explicit_tokens_real)
print "#endif"
else:
assert(False)
\ No newline at end of file
#!/usr/bin/python
simple_tokens = ["tridiag_complex_PRECISION",
"trans_ev_complex_PRECISION",
"solve_complex_PRECISION",
"hh_transform_complex_PRECISION",
"elpa_transpose_vectors_complex_PRECISION",
"elpa_reduce_add_vectors_complex_PRECISION",
]
blas_tokens = ["PRECISION_GEMV",
"PRECISION_TRMV",
"PRECISION_GEMM",
"PRECISION_TRMM",
"PRECISION_HERK",
"cublas_PRECISION_gemm",
"cublas_PRECISION_trmm",
"cublas_PRECISION_gemv",
]
explicit_tokens = [("PRECISION_SUFFIX", "\"_double\"", "\"_single\""),
("MPI_COMPLEX_PRECISION", "MPI_DOUBLE_COMPLEX", "MPI_COMPLEX"),
("MPI_REAL_PRECISION", "MPI_REAL8", "MPI_REAL4"),
("KIND_PRECISION", "rk8", "rk4"),
("PRECISION_CMPLX", "DCMPLX", "CMPLX"),
("PRECISION_IMAG", "DIMAG", "AIMAG"),
("PRECISION_REAL", "DREAL", "REAL"),
("CONST_REAL_0_0", "0.0_rk8", "0.0_rk4"),
("CONST_REAL_1_0", "1.0_rk8", "1.0_rk4"),
("size_of_PRECISION_complex", "size_of_double_complex_datatype", "size_of_single_complex_datatype"),
]
print "#ifdef DOUBLE_PRECISION_COMPLEX"
for token in simple_tokens:
print "#define ", token, token.replace("PRECISION", "double")
for token in blas_tokens:
print "#define ", token, token.replace("PRECISION_", "Z")
for token in explicit_tokens:
print "#define ", token[0], token[1]
print "#else"
for token in simple_tokens:
print "#undef ", token
for token in blas_tokens:
print "#undef ", token
for token in explicit_tokens:
print "#undef ", token[0]
for token in simple_tokens:
print "#define ", token, token.replace("PRECISION", "single")
for token in blas_tokens:
print "#define ", token, token.replace("PRECISION_", "C")
for token in explicit_tokens:
print "#define ", token[0], token[2]
print "#endif"
This diff is collapsed.
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
! distributed along with the original code in the file "COPYING". ! distributed along with the original code in the file "COPYING".
#endif #endif
subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, & subroutine solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, wantDebug, success ) mpi_comm_cols, wantDebug, success )
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
...@@ -81,7 +81,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -81,7 +81,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
integer(kind=ik) :: istat integer(kind=ik) :: istat
character(200) :: errorMessage character(200) :: errorMessage
call timer%start("solve_tridi" // M_PRECISION_SUFFIX) call timer%start("solve_tridi" // PRECISION_SUFFIX)
call timer%start("mpi_communication") call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr) call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
...@@ -96,7 +96,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -96,7 +96,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q
! Set Q to 0 ! Set Q to 0
q(1:l_rows, 1:l_cols) = M_CONST_0_0 q(1:l_rows, 1:l_cols) = CONST_0_0
! Get the limits of the subdivisons, each subdivison has as many cols ! Get the limits of the subdivisons, each subdivison has as many cols
! as fit on the respective processor column ! as fit on the respective processor column
...@@ -116,7 +116,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -116,7 +116,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
! Scalapack supports it but delivers no results for these columns, ! Scalapack supports it but delivers no results for these columns,
! which is rather annoying ! which is rather annoying
if (nc==0) then if (nc==0) then
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX) call timer%stop("solve_tridi" // PRECISION_SUFFIX)
if (wantDebug) write(error_unit,*) 'ELPA1_solve_tridi: ERROR: Problem contains processor column with zero width' if (wantDebug) write(error_unit,*) 'ELPA1_solve_tridi: ERROR: Problem contains processor column with zero width'
success = .false. success = .false.
return return
...@@ -141,10 +141,10 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -141,10 +141,10 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
else else
nev1 = MIN(nev,l_cols) nev1 = MIN(nev,l_cols)
endif endif
call M_solve_tridi_col_PRECISION(l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, & call solve_tridi_col_PRECISION(l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
matrixCols, mpi_comm_rows, wantDebug, success) matrixCols, mpi_comm_rows, wantDebug, success)
if (.not.(success)) then if (.not.(success)) then
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX) call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return return
endif endif
! If there is only 1 processor column, we are done ! If there is only 1 processor column, we are done
...@@ -156,7 +156,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -156,7 +156,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop stop
endif endif
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX) call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return return
endif endif
...@@ -215,9 +215,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -215,9 +215,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
enddo enddo
! Recursively merge sub problems ! Recursively merge sub problems
call M_merge_recursive_PRECISION(0, np_cols, wantDebug, success) call merge_recursive_PRECISION(0, np_cols, wantDebug, success)
if (.not.(success)) then if (.not.(success)) then
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX) call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return return
endif endif
...@@ -227,11 +227,11 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -227,11 +227,11 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop stop
endif endif
call timer%stop("solve_tridi" // M_PRECISION_SUFFIX) call timer%stop("solve_tridi" // PRECISION_SUFFIX)
return return
contains contains
recursive subroutine M_merge_recursive_PRECISION(np_off, nprocs, wantDebug, success) recursive subroutine merge_recursive_PRECISION(np_off, nprocs, wantDebug, success)
use precision use precision
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
use timings use timings
...@@ -264,9 +264,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -264,9 +264,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
np1 = nprocs/2 np1 = nprocs/2
np2 = nprocs-np1 np2 = nprocs-np1
if (np1 > 1) call M_merge_recursive_PRECISION(np_off, np1, wantDebug, success) if (np1 > 1) call merge_recursive_PRECISION(np_off, np1, wantDebug, success)
if (.not.(success)) return if (.not.(success)) return
if (np2 > 1) call M_merge_recursive_PRECISION(np_off+np1, np2, wantDebug, success) if (np2 > 1) call merge_recursive_PRECISION(np_off+np1, np2, wantDebug, success)
if (.not.(success)) return if (.not.(success)) return
noff = limits(np_off) noff = limits(np_off)
...@@ -277,7 +277,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -277,7 +277,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
call timer%start("mpi_communication") call timer%start("mpi_communication")
if (my_pcol==np_off) then if (my_pcol==np_off) then
do n=np_off+np1,np_off+nprocs-1 do n=np_off+np1,np_off+nprocs-1
call mpi_send(d(noff+1), nmid, M_MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr) call mpi_send(d(noff+1), nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
enddo enddo
endif endif
call timer%stop("mpi_communication") call timer%stop("mpi_communication")
...@@ -286,7 +286,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -286,7 +286,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then
#ifdef WITH_MPI #ifdef WITH_MPI
call timer%start("mpi_communication") call timer%start("mpi_communication")
call mpi_recv(d(noff+1), nmid, M_MPI_REAL_PRECISION, np_off, 1, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr) call mpi_recv(d(noff+1), nmid, MPI_REAL_PRECISION, np_off, 1, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication") call timer%stop("mpi_communication")
#else /* WITH_MPI */ #else /* WITH_MPI */
! d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1) ! d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1)
...@@ -297,7 +297,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -297,7 +297,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
do n=np_off,np_off+np1-1 do n=np_off,np_off+np1-1
#ifdef WITH_MPI #ifdef WITH_MPI
call timer%start("mpi_communication") call timer%start("mpi_communication")
call mpi_send(d(noff+nmid+1), nlen-nmid, M_MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr) call mpi_send(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
call timer%stop("mpi_communication") call timer%stop("mpi_communication")
#endif /* WITH_MPI */ #endif /* WITH_MPI */
...@@ -306,7 +306,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -306,7 +306,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
if (my_pcol>=np_off .and. my_pcol<np_off+np1) then if (my_pcol>=np_off .and. my_pcol<np_off+np1) then
#ifdef WITH_MPI #ifdef WITH_MPI
call timer%start("mpi_communication") call timer%start("mpi_communication")
call mpi_recv(d(noff+nmid+1), nlen-nmid, M_MPI_REAL_PRECISION, np_off+np1, 1,mpi_comm_cols, MPI_STATUS_IGNORE, mpierr) call mpi_recv(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, np_off+np1, 1,mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication") call timer%stop("mpi_communication")
#else /* WITH_MPI */ #else /* WITH_MPI */
! d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) ! d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1)
...@@ -316,22 +316,22 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -316,22 +316,22 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
! Last merge, result distribution must be block cyclic, noff==0, ! Last merge, result distribution must be block cyclic, noff==0,
! p_col_bc is set so that only nev eigenvalues are calculated ! p_col_bc is set so that only nev eigenvalues are calculated
call M_merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, & call merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col, p_col, & nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col, p_col, &
l_col_bc, p_col_bc, np_off, nprocs, wantDebug, success ) l_col_bc, p_col_bc, np_off, nprocs, wantDebug, success )
if (.not.(success)) return if (.not.(success)) return
else else
! Not last merge, leave dense column distribution ! Not last merge, leave dense column distribution
call M_merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, & call merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col(noff+1), p_col(noff+1), & nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col(noff+1), p_col(noff+1), &
l_col(noff+1), p_col(noff+1), np_off, nprocs, wantDebug, success ) l_col(noff+1), p_col(noff+1), np_off, nprocs, wantDebug, success )
if (.not.(success)) return if (.not.(success)) return
endif endif
end subroutine M_merge_recursive_PRECISION end subroutine merge_recursive_PRECISION
end subroutine M_solve_tridi_PRECISION end subroutine solve_tridi_PRECISION
subroutine M_solve_tridi_col_PRECISION( na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success ) subroutine solve_tridi_col_PRECISION( na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success )
! Solves the symmetric, tridiagonal eigenvalue problem on one processor column ! Solves the symmetric, tridiagonal eigenvalue problem on one processor column
! with the divide and conquer method. ! with the divide and conquer method.
...@@ -365,7 +365,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -365,7 +365,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
integer(kind=ik) :: istat integer(kind=ik) :: istat
character(200) :: errorMessage character(200) :: errorMessage
call timer%start("solve_tridi_col" // M_PRECISION_SUFFIX) call timer%start("solve_tridi_col" // PRECISION_SUFFIX)
call timer%start("mpi_communication") call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr) call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr) call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
...@@ -427,7 +427,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -427,7 +427,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
noff = limits(n) ! Start of subproblem noff = limits(n) ! Start of subproblem
nlen = limits(n+1)-noff ! Size of subproblem nlen = limits(n+1)-noff ! Size of subproblem
call M_solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1), & call solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1), &
q(nqoff+noff+1,noff+1),ubound(q,dim=1), wantDebug, success) q(nqoff+noff+1,noff+1),ubound(q,dim=1), wantDebug, success)
if (.not.(success)) return if (.not.(success)) return
...@@ -456,7 +456,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -456,7 +456,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
noff = limits(my_prow) ! Start of subproblem noff = limits(my_prow) ! Start of subproblem
nlen = limits(my_prow+1)-noff ! Size of subproblem nlen = limits(my_prow+1)-noff ! Size of subproblem
call M_solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1),qmat1, & call solve_tridi_single_problem_PRECISION(nlen,d(noff+1),e(noff+1),qmat1, &
ubound(qmat1,dim=1), wantDebug, success) ubound(qmat1,dim=1), wantDebug, success)
if (.not.(success)) return if (.not.(success)) return
...@@ -470,9 +470,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -470,9 +470,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
nlen = limits(np+1)-noff nlen = limits(np+1)-noff
#ifdef WITH_MPI #ifdef WITH_MPI
call timer%start("mpi_communication") call timer%start("mpi_communication")
call MPI_Bcast(d(noff+1), nlen, M_MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr) call MPI_Bcast(d(noff+1), nlen, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
qmat2 = qmat1 qmat2 = qmat1
call MPI_Bcast(qmat2, max_size*max_size, M_MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr) call MPI_Bcast(qmat2, max_size*max_size, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
call timer%stop("mpi_communication") call timer%stop("mpi_communication")
#else /* WITH_MPI */ #else /* WITH_MPI */
! qmat2 = qmat1 ! is this correct ! qmat2 = qmat1 ! is this correct
...@@ -480,9 +480,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -480,9 +480,9 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
do i=1,nlen do i=1,nlen
#ifdef WITH_MPI #ifdef WITH_MPI
call M_distribute_global_column_PRECISION(qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk) call distribute_global_column_PRECISION(qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#else /* WITH_MPI */ #else /* WITH_MPI */
call M_distribute_global_column_PRECISION(qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk) call distribute_global_column_PRECISION(qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#endif /* WITH_MPI */ #endif /* WITH_MPI */
enddo enddo
...@@ -525,7 +525,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -525,7 +525,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
! Last merge, set p_col_o=-1 for unneeded (output) eigenvectors ! Last merge, set p_col_o=-1 for unneeded (output) eigenvectors
p_col_o(nev+1:na) = -1 p_col_o(nev+1:na) = -1
endif endif
call M_merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, & call merge_systems_PRECISION(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_self, l_col(noff+1), p_col_i(noff+1), & matrixCols, mpi_comm_rows, mpi_comm_self, l_col(noff+1), p_col_i(noff+1), &
l_col(noff+1), p_col_o(noff+1), 0, 1, wantDebug, success) l_col(noff+1), p_col_o(noff+1), 0, 1, wantDebug, success)
if (.not.(success)) return if (.not.(success)) return
...@@ -542,11 +542,11 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -542,11 +542,11 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop stop
endif endif
call timer%stop("solve_tridi_col" // M_PRECISION_SUFFIX) call timer%stop("solve_tridi_col" // PRECISION_SUFFIX)
end subroutine M_solve_tridi_col_PRECISION end subroutine solve_tridi_col_PRECISION
recursive subroutine M_solve_tridi_single_problem_PRECISION(nlen, d, e, q, ldq, wantDebug, success) recursive subroutine solve_tridi_single_problem_PRECISION(nlen, d, e, q, ldq, wantDebug, success)
! Solves the symmetric, tridiagonal eigenvalue problem on a single processor. ! Solves the symmetric, tridiagonal eigenvalue problem on a single processor.
! Takes precautions if DSTEDC fails or if the eigenvalues are not ordered correctly. ! Takes precautions if DSTEDC fails or if the eigenvalues are not ordered correctly.
...@@ -572,7 +572,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -572,7 +572,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
integer(kind=ik) :: istat integer(kind=ik) :: istat
character(200) :: errorMessage character(200) :: errorMessage
call timer%start("solve_tridi_single" // M_PRECISION_SUFFIX) call timer%start("solve_tridi_single" // PRECISION_SUFFIX)
success = .true. success = .true.
allocate(ds(nlen), es(nlen), stat=istat, errmsg=errorMessage) allocate(ds(nlen), es(nlen), stat=istat, errmsg=errorMessage)
...@@ -596,7 +596,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -596,7 +596,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
stop stop
endif endif
call M_PRECISION_STEDC('I', nlen, d, e, q, ldq, work, lwork, iwork, liwork, info) call PRECISION_STEDC('I', nlen, d, e, q, ldq, work, lwork, iwork, liwork, info)
if (info /= 0) then if (info /= 0) then
...@@ -606,7 +606,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -606,7 +606,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
d(:) = ds(:) d(:) = ds(:)
e(:) = es(:) e(:) = es(:)
call M_PRECISION_STEQR('I', nlen, d, e, q, ldq, work, info) call PRECISION_STEQR('I', nlen, d, e, q, ldq, work, info)
! If DSTEQR fails also, we don't know what to do further ... ! If DSTEQR fails also, we don't know what to do further ...
if (info /= 0) then if (info /= 0) then
...@@ -666,7 +666,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi ...@@ -666,7 +666,7 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
endif endif
enddo enddo
call timer%stop("solve_tridi_single" // M_PRECISION_SUFFIX) call timer%stop("solve_tridi_single" // PRECISION_SUFFIX)
end subroutine M_solve_tridi_single_problem_PRECISION end subroutine solve_tridi_single_problem_PRECISION
...@@ -54,16 +54,16 @@ ...@@ -54,16 +54,16 @@
#if REALCASE == 1 #if REALCASE == 1
subroutine M_v_add_s_PRECISION(v,n,s) subroutine v_add_s_PRECISION(v,n,s)
use precision use precision
implicit none implicit none
integer(kind=ik) :: n integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: v(n),s real(kind=REAL_DATATYPE) :: v(n),s
v(:) = v(:) + s v(:) = v(:) + s
end subroutine M_v_add_s_PRECISION end subroutine v_add_s_PRECISION
subroutine M_distribute_global_column_PRECISION(g_col, l_col, noff, nlen, my_prow, np_rows, nblk) subroutine distribute_global_column_PRECISION(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision use precision
implicit none implicit none
...@@ -88,9 +88,9 @@ ...@@ -88,9 +88,9 @@
l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff) l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
enddo enddo
end subroutine M_distribute_global_column_PRECISION end subroutine distribute_global_column_PRECISION
subroutine M_solve_secular_equation_PRECISION(n, i, d, z, delta, rho, dlam) subroutine solve_secular_equation_PRECISION(n, i, d, z, delta, rho, dlam)
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified ! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix: ! diagonal matrix:
...@@ -159,7 +159,7 @@ ...@@ -159,7 +159,7 @@