Commit 7d5728cb by Andreas Marek

### Further unify REAL and COMPLEXCASE

parent d09941c3
 ... @@ -127,11 +127,18 @@ ... @@ -127,11 +127,18 @@ integer(kind=ik) :: max_stored_rows integer(kind=ik) :: max_stored_rows #if REALCASE == 1 #ifdef DOUBLE_PRECISION_REAL real(kind=rk8), parameter :: ZERO = 0.0_rk8, ONE = 1.0_rk8 #else real(kind=rk4), parameter :: ZERO = 0.0_rk4, ONE = 1.0_rk4 #endif #endif #if COMPLEXCASE == 1 #if COMPLEXCASE == 1 #ifdef DOUBLE_PRECISION_COMPLEX #ifdef DOUBLE_PRECISION_COMPLEX complex(kind=ck8), parameter :: CZERO = (0.0_rk8,0.0_rk8), CONE = (1.0_rk8,0.0_rk8) complex(kind=ck8), parameter :: ZERO = (0.0_rk8,0.0_rk8), ONE = (1.0_rk8,0.0_rk8) #else #else complex(kind=ck4), parameter :: CZERO = (0.0_rk4,0.0_rk4), CONE = (1.0_rk4,0.0_rk4) complex(kind=ck4), parameter :: ZERO = (0.0_rk4,0.0_rk4), ONE = (1.0_rk4,0.0_rk4) #endif #endif #endif #endif integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr ... @@ -224,7 +231,7 @@ ... @@ -224,7 +231,7 @@ #if COMPLEXCASE == 1 #if COMPLEXCASE == 1 ! In the complex case tau(2) /= 0 ! In the complex case tau(2) /= 0 if (my_prow == prow(1, nblk, np_rows)) then if (my_prow == prow(1, nblk, np_rows)) then q_mat(1,1:l_cols) = q_mat(1,1:l_cols)*(CONE-tau(2)) q_mat(1,1:l_cols) = q_mat(1,1:l_cols)*(ONE-tau(2)) endif endif #endif #endif ... @@ -332,13 +339,12 @@ ... @@ -332,13 +339,12 @@ call timer%start("blas") call timer%start("blas") if (l_rows>0) & if (l_rows>0) & #if REALCASE == 1 #if REALCASE == 1 call PRECISION_SYRK('U', 'T', nstor, l_rows, & call PRECISION_SYRK('U', 'T', & CONST_1_0, hvm, ubound(hvm,dim=1), & CONST_0_0, tmat, max_stored_rows) #endif #endif #if COMPLEXCASE == 1 #if COMPLEXCASE == 1 call PRECISION_HERK('U', 'C', nstor, l_rows, CONE, hvm, ubound(hvm,dim=1), CZERO, tmat, max_stored_rows) call PRECISION_HERK('U', 'C', & #endif #endif nstor, l_rows, ONE, hvm, ubound(hvm,dim=1), ZERO, tmat, max_stored_rows) call timer%stop("blas") call timer%stop("blas") nc = 0 nc = 0 do n = 1, nstor-1 do n = 1, nstor-1 ... @@ -368,16 +374,14 @@ ... @@ -368,16 +374,14 @@ do n = 1, nstor-1 do n = 1, nstor-1 call timer%start("blas") call timer%start("blas") #if REALCASE == 1 #if REALCASE == 1 call PRECISION_TRMV('L', 'T', 'N', n, & call PRECISION_TRMV('L', 'T', 'N', & tmat, max_stored_rows, & h2(nc+1), 1) #endif #endif #if COMPLEXCASE == 1 #if COMPLEXCASE == 1 call PRECISION_TRMV('L', 'C', 'N', n, & call PRECISION_TRMV('L', 'C', 'N', & tmat, max_stored_rows, & h2(nc+1),1) #endif #endif n, tmat, max_stored_rows, h2(nc+1), 1) call timer%stop("blas") call timer%stop("blas") tmat(n+1,1:n) = & tmat(n+1,1:n) = & #if REALCASE == 1 #if REALCASE == 1 -h2(nc+1:nc+n) & -h2(nc+1:nc+n) & ... @@ -421,32 +425,26 @@ ... @@ -421,32 +425,26 @@ if (useGPU) then if (useGPU) then call timer%start("cublas") call timer%start("cublas") #if REALCASE == 1 #if REALCASE == 1 call cublas_PRECISION_GEMM('T', 'N', nstor, l_cols, l_rows, & call cublas_PRECISION_GEMM('T', 'N', & CONST_1_0, hvm_dev, hvm_ubnd, & q_dev, ldq, & CONST_0_0, tmp_dev, nstor) #endif #endif #if COMPLEXCASE == 1 #if COMPLEXCASE == 1 call cublas_PRECISION_GEMM('C', 'N', nstor, l_cols, l_rows, & call cublas_PRECISION_GEMM('C', 'N', & CONE, hvm_dev, hvm_ubnd, & q_dev, ldq, & CZERO, tmp_dev, nstor) #endif #endif nstor, l_cols, l_rows, ONE, hvm_dev, hvm_ubnd, & q_dev, ldq, ZERO, tmp_dev, nstor) call timer%stop("cublas") call timer%stop("cublas") else ! useGPU else ! useGPU call timer%start("blas") call timer%start("blas") #if REALCASE == 1 #if REALCASE == 1 call PRECISION_GEMM('T', 'N', nstor, l_cols, l_rows, & call PRECISION_GEMM('T', 'N', & CONST_1_0, hvm, ubound(hvm,dim=1), & q_mat, ldq, & CONST_0_0, tmp1, nstor) #endif #endif #if COMPLEXCASE == 1 #if COMPLEXCASE == 1 call PRECISION_GEMM('C', 'N', nstor, l_cols, l_rows, & call PRECISION_GEMM('C', 'N', & CONE, hvm, ubound(hvm,dim=1), & q_mat, ldq, & CZERO, tmp1 ,nstor) #endif #endif nstor, l_cols, l_rows, ONE, hvm, ubound(hvm,dim=1), & q_mat, ldq, ZERO, tmp1, nstor) call timer%stop("blas") call timer%stop("blas") endif ! useGPU endif ! useGPU ... @@ -505,70 +503,31 @@ ... @@ -505,70 +503,31 @@ if (l_rows>0) then if (l_rows>0) then if (useGPU) then if (useGPU) then call timer%start("cublas") call timer%start("cublas") #if REALCASE == 1 call cublas_PRECISION_TRMM('L', 'L', 'N', 'N', & call cublas_PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & nstor, l_cols, ONE, tmat_dev, max_stored_rows, & CONST_1_0, tmat_dev, max_stored_rows, & tmp_dev, nstor) call cublas_PRECISION_GEMM('N', 'N' ,l_rows ,l_cols ,nstor, & -CONST_1_0, hvm_dev, hvm_ubnd, & tmp_dev, nstor, & CONST_1_0, q_dev, ldq) #endif #if COMPLEXCASE == 1 call cublas_PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & CONE, tmat_dev, max_stored_rows, & tmp_dev, nstor) tmp_dev, nstor) call cublas_PRECISION_GEMM('N', 'N' ,l_rows ,l_cols ,nstor, & call cublas_PRECISION_GEMM('N', 'N' ,l_rows ,l_cols ,nstor, & -CONE, hvm_dev, hvm_ubnd, & -ONE, hvm_dev, hvm_ubnd, tmp_dev, nstor, & tmp_dev, nstor, & ONE, q_dev, ldq) CONE, q_dev, ldq) #endif call timer%stop("cublas") call timer%stop("cublas") else !useGPU else !useGPU #ifdef WITH_MPI #ifdef WITH_MPI ! tmp2 = tmat * tmp2 ! tmp2 = tmat * tmp2 call timer%start("blas") call timer%start("blas") #if REALCASE ==1 call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & CONST_1_0, tmat, max_stored_rows, & ONE, tmat, max_stored_rows, tmp2, nstor) tmp2, nstor) !q_mat = q_mat - hvm*tmp2 !q_mat = q_mat - hvm*tmp2 call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & -CONST_1_0, hvm, ubound(hvm,dim=1), & -ONE, hvm, ubound(hvm,dim=1), tmp2, nstor, ONE, q_mat, ldq) tmp2, nstor, & CONST_1_0, q_mat, ldq) #endif #if COMPLEXCASE == 1 call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & CONE, tmat, max_stored_rows, & tmp2, nstor) !q_mat = q_mat - hvm*tmp2 call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & -CONE, hvm, ubound(hvm,dim=1), & tmp2, nstor, & CONE, q_mat, ldq) #endif call timer%stop("blas") call timer%stop("blas") #else /* WITH_MPI */ #else /* WITH_MPI */ call timer%start("blas") call timer%start("blas") #if REALCASE == 1 call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & CONST_1_0, tmat, max_stored_rows, & tmp1, nstor) call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & -CONST_1_0, hvm, ubound(hvm,dim=1), & tmp1, nstor, & CONST_1_0, q_mat, ldq) #endif #if COMPLEXCASE == 1 call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, & CONE, tmat, max_stored_rows, & ONE, tmat, max_stored_rows, tmp1, nstor) tmp1, nstor) call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, & -CONE, hvm, ubound(hvm,dim=1), & -ONE, hvm, ubound(hvm,dim=1), tmp1, nstor, ONE, q_mat, ldq) tmp1, nstor, & CONE, q_mat, ldq) #endif call timer%stop("blas") call timer%stop("blas") #endif /* WITH_MPI */ #endif /* WITH_MPI */ endif ! useGPU endif ! useGPU ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!