Commit 7d5728cb authored by Andreas Marek's avatar Andreas Marek
Browse files

Further unify REAL and COMPLEXCASE

parent d09941c3
...@@ -127,11 +127,18 @@ ...@@ -127,11 +127,18 @@
integer(kind=ik) :: max_stored_rows integer(kind=ik) :: max_stored_rows
#if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL
real(kind=rk8), parameter :: ZERO = 0.0_rk8, ONE = 1.0_rk8
#else
real(kind=rk4), parameter :: ZERO = 0.0_rk4, ONE = 1.0_rk4
#endif
#endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX #ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=ck8), parameter :: CZERO = (0.0_rk8,0.0_rk8), CONE = (1.0_rk8,0.0_rk8) complex(kind=ck8), parameter :: ZERO = (0.0_rk8,0.0_rk8), ONE = (1.0_rk8,0.0_rk8)
#else #else
complex(kind=ck4), parameter :: CZERO = (0.0_rk4,0.0_rk4), CONE = (1.0_rk4,0.0_rk4) complex(kind=ck4), parameter :: ZERO = (0.0_rk4,0.0_rk4), ONE = (1.0_rk4,0.0_rk4)
#endif #endif
#endif #endif
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
...@@ -224,7 +231,7 @@ ...@@ -224,7 +231,7 @@
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! In the complex case tau(2) /= 0 ! In the complex case tau(2) /= 0
if (my_prow == prow(1, nblk, np_rows)) then if (my_prow == prow(1, nblk, np_rows)) then
q_mat(1,1:l_cols) = q_mat(1,1:l_cols)*(CONE-tau(2)) q_mat(1,1:l_cols) = q_mat(1,1:l_cols)*(ONE-tau(2))
endif endif
#endif #endif
...@@ -332,13 +339,12 @@ ...@@ -332,13 +339,12 @@
call timer%start("blas") call timer%start("blas")
if (l_rows>0) & if (l_rows>0) &
#if REALCASE == 1 #if REALCASE == 1
call PRECISION_SYRK('U', 'T', nstor, l_rows, & call PRECISION_SYRK('U', 'T', &
CONST_1_0, hvm, ubound(hvm,dim=1), &
CONST_0_0, tmat, max_stored_rows)
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
call PRECISION_HERK('U', 'C', nstor, l_rows, CONE, hvm, ubound(hvm,dim=1), CZERO, tmat, max_stored_rows) call PRECISION_HERK('U', 'C', &
#endif #endif
nstor, l_rows, ONE, hvm, ubound(hvm,dim=1), ZERO, tmat, max_stored_rows)
call timer%stop("blas") call timer%stop("blas")
nc = 0 nc = 0
do n = 1, nstor-1 do n = 1, nstor-1
...@@ -368,16 +374,14 @@ ...@@ -368,16 +374,14 @@
do n = 1, nstor-1 do n = 1, nstor-1
call timer%start("blas") call timer%start("blas")
#if REALCASE == 1 #if REALCASE == 1
call PRECISION_TRMV('L', 'T', 'N', n, & call PRECISION_TRMV('L', 'T', 'N', &
tmat, max_stored_rows, &
h2(nc+1), 1)
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
call PRECISION_TRMV('L', 'C', 'N', n, & call PRECISION_TRMV('L', 'C', 'N', &
tmat, max_stored_rows, &
h2(nc+1),1)
#endif #endif
n, tmat, max_stored_rows, h2(nc+1), 1)
call timer%stop("blas") call timer%stop("blas")
tmat(n+1,1:n) = & tmat(n+1,1:n) = &
#if REALCASE == 1 #if REALCASE == 1
-h2(nc+1:nc+n) & -h2(nc+1:nc+n) &
...@@ -421,32 +425,26 @@ ...@@ -421,32 +425,26 @@
if (useGPU) then if (useGPU) then
call timer%start("cublas") call timer%start("cublas")
#if REALCASE == 1 #if REALCASE == 1
call cublas_PRECISION_GEMM('T', 'N', nstor, l_cols, l_rows, & call cublas_PRECISION_GEMM('T', 'N', &
CONST_1_0, hvm_dev, hvm_ubnd, &
q_dev, ldq, &
CONST_0_0, tmp_dev, nstor)
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
call cublas_PRECISION_GEMM('C', 'N', nstor, l_cols, l_rows, & call cublas_PRECISION_GEMM('C', 'N', &
CONE, hvm_dev, hvm_ubnd, &
q_dev, ldq, &
CZERO, tmp_dev, nstor)
#endif #endif
nstor, l_cols, l_rows, ONE, hvm_dev, hvm_ubnd, &
q_dev, ldq, ZERO, tmp_dev, nstor)
call timer%stop("cublas") call timer%stop("cublas")
else ! useGPU else ! useGPU
call timer%start("blas") call timer%start("blas")
#if REALCASE == 1 #if REALCASE == 1
call PRECISION_GEMM('T', 'N', nstor, l_cols, l_rows, & call PRECISION_GEMM('T', 'N', &
CONST_1_0, hvm, ubound(hvm,dim=1), &
q_mat, ldq, &
CONST_0_0, tmp1, nstor)
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
call PRECISION_GEMM('C', 'N', nstor, l_cols, l_rows, & call PRECISION_GEMM('C', 'N', &
CONE, hvm, ubound(hvm,dim=1), &
q_mat, ldq, &
CZERO, tmp1 ,nstor)
#endif #endif
nstor, l_cols, l_rows, ONE, hvm, ubound(hvm,dim=1), &
q_mat, ldq, ZERO, tmp1, nstor)
call timer%stop("blas") call timer%stop("blas")
endif ! useGPU endif ! useGPU
...@@ -505,70 +503,31 @@ ...@@ -505,70 +503,31 @@
if (l_rows>0) then if (l_rows>0) then
if (useGPU) then if (useGPU) then
call timer%start("cublas") call timer%start("cublas")
#if REALCASE == 1 call cublas_PRECISION_TRMM('L', 'L', 'N', 'N', &
call cublas_PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & nstor, l_cols, ONE, tmat_dev, max_stored_rows, &
CONST_1_0, tmat_dev, max_stored_rows, &
tmp_dev, nstor)
call cublas_PRECISION_GEMM('N', 'N' ,l_rows ,l_cols ,nstor, &
-CONST_1_0, hvm_dev, hvm_ubnd, &
tmp_dev, nstor, &
CONST_1_0, q_dev, ldq)
#endif
#if COMPLEXCASE == 1
call cublas_PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
CONE, tmat_dev, max_stored_rows, &
tmp_dev, nstor) tmp_dev, nstor)
call cublas_PRECISION_GEMM('N', 'N' ,l_rows ,l_cols ,nstor, & call cublas_PRECISION_GEMM('N', 'N' ,l_rows ,l_cols ,nstor, &
-CONE, hvm_dev, hvm_ubnd, & -ONE, hvm_dev, hvm_ubnd, tmp_dev, nstor, &
tmp_dev, nstor, & ONE, q_dev, ldq)
CONE, q_dev, ldq)
#endif
call timer%stop("cublas") call timer%stop("cublas")
else !useGPU else !useGPU
#ifdef WITH_MPI #ifdef WITH_MPI
! tmp2 = tmat * tmp2 ! tmp2 = tmat * tmp2
call timer%start("blas") call timer%start("blas")
#if REALCASE ==1
call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
CONST_1_0, tmat, max_stored_rows, & ONE, tmat, max_stored_rows, tmp2, nstor)
tmp2, nstor)
!q_mat = q_mat - hvm*tmp2 !q_mat = q_mat - hvm*tmp2
call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-CONST_1_0, hvm, ubound(hvm,dim=1), & -ONE, hvm, ubound(hvm,dim=1), tmp2, nstor, ONE, q_mat, ldq)
tmp2, nstor, &
CONST_1_0, q_mat, ldq)
#endif
#if COMPLEXCASE == 1
call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
CONE, tmat, max_stored_rows, &
tmp2, nstor)
!q_mat = q_mat - hvm*tmp2
call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-CONE, hvm, ubound(hvm,dim=1), &
tmp2, nstor, &
CONE, q_mat, ldq)
#endif
call timer%stop("blas") call timer%stop("blas")
#else /* WITH_MPI */ #else /* WITH_MPI */
call timer%start("blas") call timer%start("blas")
#if REALCASE == 1
call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
CONST_1_0, tmat, max_stored_rows, &
tmp1, nstor)
call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-CONST_1_0, hvm, ubound(hvm,dim=1), &
tmp1, nstor, &
CONST_1_0, q_mat, ldq)
#endif
#if COMPLEXCASE == 1
call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
CONE, tmat, max_stored_rows, & ONE, tmat, max_stored_rows, tmp1, nstor)
tmp1, nstor)
call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-CONE, hvm, ubound(hvm,dim=1), & -ONE, hvm, ubound(hvm,dim=1), tmp1, nstor, ONE, q_mat, ldq)
tmp1, nstor, &
CONE, q_mat, ldq)
#endif
call timer%stop("blas") call timer%stop("blas")
#endif /* WITH_MPI */ #endif /* WITH_MPI */
endif ! useGPU endif ! useGPU
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment