Commit 830b54aa authored by Pavel Kus's avatar Pavel Kus

renaming matrix a to a_mat

parent 68e4bfa8
......@@ -64,7 +64,7 @@
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, a, a_dev, lda, nblk, nbw, matrixCols, numBlocks, mpi_comm_rows, mpi_comm_cols, tmat, &
(obj, na, a_mat, a_dev, lda, nblk, nbw, matrixCols, numBlocks, mpi_comm_rows, mpi_comm_cols, tmat, &
tmat_dev, wantDebug, useGPU, success &
#if REALCASE == 1
, useQR)
......@@ -80,14 +80,14 @@
!
! na Order of matrix
!
! a(lda,matrixCols) Distributed matrix which should be reduced.
! a_mat(lda,matrixCols) Distributed matrix which should be reduced.
! Distribution is like in Scalapack.
! Opposed to Scalapack, a(:,:) must be set completely (upper and lower half)
! a(:,:) is overwritten on exit with the band and the Householder vectors
! Opposed to Scalapack, a_mat(:,:) must be set completely (upper and lower half)
! a_mat(:,:) is overwritten on exit with the band and the Householder vectors
! in the upper half.
!
! lda Leading dimension of a
! matrixCols local columns of matrix a
! lda Leading dimension of a_mat
! matrixCols local columns of matrix a_mat
!
! nblk blocksize of cyclic distribution, must be the same in both directions!
!
......@@ -116,9 +116,9 @@
integer(kind=ik) :: na, lda, nblk, nbw, matrixCols, numBlocks, mpi_comm_rows, mpi_comm_cols
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=rck) :: a(lda,*), tmat(nbw,nbw,*)
MATH_DATATYPE(kind=rck) :: a_mat(lda,*), tmat(nbw,nbw,*)
#else
MATH_DATATYPE(kind=rck) :: a(lda,matrixCols), tmat(nbw,nbw,numBlocks)
MATH_DATATYPE(kind=rck) :: a_mat(lda,matrixCols), tmat(nbw,nbw,numBlocks)
#endif
#if REALCASE == 1
......@@ -325,14 +325,14 @@
#ifdef USE_ASSUMED_SIZE_QR
call qr_pdgeqrf_2dcomm_&
&PRECISION&
&(obj, a, lda, matrixCols, vmrCPU, max(l_rows,1), vmrCols, tauvector(1), na, tmat(1,1,1), &
&(obj, a_mat, lda, matrixCols, vmrCPU, max(l_rows,1), vmrCols, tauvector(1), na, tmat(1,1,1), &
nbw, nbw, dwork_size, 1, -1, na, nbw, nblk, nblk, na, na, 1, 0, PQRPARAM(1:11), &
mpi_comm_rows, mpi_comm_cols, blockheuristic)
#else
call qr_pdgeqrf_2dcomm_&
&PRECISION&
&(obj, a(1:lda,1:matrixCols), matrixCols, lda, vmrCPU(1:max(l_rows,1),1:vmrCols), max(l_rows,1), &
&(obj, a_mat(1:lda,1:matrixCols), matrixCols, lda, vmrCPU(1:max(l_rows,1),1:vmrCols), max(l_rows,1), &
vmrCols, tauvector(1:na), na, tmat(1:nbw,1:nbw,1), nbw, &
nbw, dwork_size(1:1), 1, -1, na, nbw, nblk, nblk, na, na, 1, 0, PQRPARAM(1:11), &
mpi_comm_rows, mpi_comm_cols, blockheuristic)
......@@ -361,7 +361,7 @@
cur_l_rows = 0
cur_l_cols = 0
successCUDA = cuda_memcpy(a_dev, loc(a(1,1)), (lda)*(na_cols)* size_of_datatype, cudaMemcpyHostToDevice)
successCUDA = cuda_memcpy(a_dev, loc(a_mat(1,1)), (lda)*(na_cols)* size_of_datatype, cudaMemcpyHostToDevice)
if (.not.(successCUDA)) then
print *,"bandred_&
&MATH_DATATYPE&
......@@ -539,7 +539,7 @@
cur_pcol = pcol(istep*nbw+1, nblk, np_cols)
if (my_pcol == cur_pcol) then
successCUDA = cuda_memcpy2d(loc(a(1, lc_start)), &
successCUDA = cuda_memcpy2d(loc(a_mat(1, lc_start)), &
int((lda*size_of_datatype),kind=c_intptr_t), &
(a_dev + int( ( (lc_start-1) * lda*size_of_datatype),kind=c_intptr_t )), &
int(lda*size_of_datatype,kind=c_intptr_t), &
......@@ -568,7 +568,7 @@
#ifdef USE_ASSUMED_SIZE_QR
call qr_pdgeqrf_2dcomm_&
&PRECISION&
&(obj, a, lda, matrixCols, vmrCPU, max(l_rows,1), vmrCols, tauvector(1), &
&(obj, a_mat, lda, matrixCols, vmrCPU, max(l_rows,1), vmrCols, tauvector(1), &
na, tmat(1,1,istep), nbw, nbw, work_blocked, work_size, &
work_size, na, n_cols, nblk, nblk, &
istep*nbw+n_cols-nbw, istep*nbw+n_cols, 1,&
......@@ -578,7 +578,7 @@
#else
call qr_pdgeqrf_2dcomm_&
&PRECISION&
&(obj, a(1:lda,1:matrixCols), lda, matrixCols, vmrCPU(1:max(l_rows,1),1:vmrCols) , &
&(obj, a_mat(1:lda,1:matrixCols), lda, matrixCols, vmrCPU(1:max(l_rows,1),1:vmrCols) , &
max(l_rows,1), vmrCols, tauvector(1:na), na, &
tmat(1:nbw,1:nbw,istep), nbw, nbw, work_blocked(1:work_size), work_size, &
work_size, na, n_cols, nblk, nblk, &
......@@ -609,7 +609,7 @@
! Get Vector to be transformed; distribute last element and norm of
! remaining elements to all procs in current column
vr(1:lr) = a(1:lr,lch) ! Vector to be transformed
vr(1:lr) = a_mat(1:lr,lch) ! Vector to be transformed
if (my_prow==prow(nrow, nblk, np_rows)) then
aux1(1) = dot_product(vr(1:lr-1),vr(1:lr-1))
......@@ -647,11 +647,11 @@
vr(1:lr) = vr(1:lr) * xf
if (my_prow==prow(nrow, nblk, np_rows)) then
a(1:lr-1,lch) = vr(1:lr-1)
a(lr,lch) = vrl
a_mat(1:lr-1,lch) = vr(1:lr-1)
a_mat(lr,lch) = vrl
vr(lr) = 1.0_rck
else
a(1:lr,lch) = vr(1:lr)
a_mat(1:lr,lch) = vr(1:lr)
endif
endif
......@@ -693,7 +693,7 @@
lcx = local_index(istep*nbw+j, my_pcol, np_cols, nblk, 0)
if (lcx>0) then
nlc = nlc+1
aux1(nlc) = dot_product(vr(1:lr),a(1:lr,lcx))
aux1(nlc) = dot_product(vr(1:lr),a_mat(1:lr,lcx))
endif
enddo
......@@ -709,7 +709,7 @@
lcx = local_index(istep*nbw+j, my_pcol, np_cols, nblk, 0)
if (lcx>0) then
nlc = nlc+1
a(1:lr,lcx) = a(1:lr,lcx) - conjg(tau)*aux2(nlc)*vr(1:lr)
a_mat(1:lr,lcx) = a_mat(1:lr,lcx) - conjg(tau)*aux2(nlc)*vr(1:lr)
endif
enddo
......@@ -727,7 +727,7 @@
lcx = local_index(istep*nbw+j, my_pcol, np_cols, nblk, 0)
if (lcx>0) then
nlc = nlc+1
a(1:lr,lcx) = a(1:lr,lcx) - conjg(tau)*aux1(nlc)*vr(1:lr)
a_mat(1:lr,lcx) = a_mat(1:lr,lcx) - conjg(tau)*aux1(nlc)*vr(1:lr)
endif
enddo
......@@ -740,7 +740,7 @@
! lcx = local_index(istep*nbw+j, my_pcol, np_cols, nblk, 0)
! if (lcx>0) then
! nlc = nlc+1
! a(1:lr,lcx) = a(1:lr,lcx) - conjg(tau)*aux2(nlc)*vr(1:lr)
! a_mat(1:lr,lcx) = a_mat(1:lr,lcx) - conjg(tau)*aux2(nlc)*vr(1:lr)
! endif
! enddo
......@@ -763,7 +763,7 @@
if (lcx>0 ) then
mynlc = mynlc+1
if ( mod((j-1), omp_get_num_threads()) .eq. omp_get_thread_num() ) then
if (lr>0) aux1(mynlc) = dot_product(vr(1:lr),a(1:lr,lcx))
if (lr>0) aux1(mynlc) = dot_product(vr(1:lr),a_mat(1:lr,lcx))
endif
endif
enddo
......@@ -796,10 +796,10 @@
do pp = 1,transformChunkSize
if (pp + ii > lr) exit
#if REALCASE == 1
a(ii+pp,lcx) = a(ii+pp,lcx) - tau*aux2(mynlc)*vr(ii+pp)
a_mat(ii+pp,lcx) = a_mat(ii+pp,lcx) - tau*aux2(mynlc)*vr(ii+pp)
#endif
#if COMPLEXCASE == 1
a(ii+pp,lcx) = a(ii+pp,lcx) - conjg(tau)*aux2(mynlc)*vr(ii+pp)
a_mat(ii+pp,lcx) = a_mat(ii+pp,lcx) - conjg(tau)*aux2(mynlc)*vr(ii+pp)
#endif
enddo
enddo
......@@ -814,7 +814,7 @@
lcx = local_index(istep*nbw+j, my_pcol, np_cols, nblk, 0)
if (lcx>0) then
nlc = nlc+1
if (lr>0) aux1(nlc) = dot_product(vr(1:lr),a(1:lr,lcx))
if (lr>0) aux1(nlc) = dot_product(vr(1:lr),a_mat(1:lr,lcx))
endif
enddo
......@@ -835,10 +835,10 @@
if (lcx>0) then
nlc = nlc+1
#if REALCASE == 1
a(1:lr,lcx) = a(1:lr,lcx) - tau*aux2(nlc)*vr(1:lr)
a_mat(1:lr,lcx) = a_mat(1:lr,lcx) - tau*aux2(nlc)*vr(1:lr)
#endif
#if COMPLEXCASE == 1
a(1:lr,lcx) = a(1:lr,lcx) - conjg(tau)*aux2(nlc)*vr(1:lr)
a_mat(1:lr,lcx) = a_mat(1:lr,lcx) - conjg(tau)*aux2(nlc)*vr(1:lr)
#endif
endif
enddo
......@@ -851,7 +851,7 @@
if (my_pcol == cur_pcol) then
successCUDA = cuda_memcpy2d((a_dev+ &
int(((lc_start-1)*lda*size_of_datatype),kind=c_intptr_t)), &
int(lda*size_of_datatype,kind=c_intptr_t), loc(a(1,lc_start)), &
int(lda*size_of_datatype,kind=c_intptr_t), loc(a_mat(1,lc_start)), &
int(lda*size_of_datatype,kind=c_intptr_t), &
int(lr_end*size_of_datatype,kind=c_intptr_t), &
int((lc_end - lc_start+1),kind=c_intptr_t), &
......@@ -932,7 +932,7 @@
if (my_pcol == cur_pcol) then
successCUDA = cuda_memcpy2d((a_dev+ &
int(((lc_start-1)*lda*size_of_datatype),kind=c_intptr_t)), &
int(lda*size_of_datatype,kind=c_intptr_t), loc(a(1,lc_start)), &
int(lda*size_of_datatype,kind=c_intptr_t), loc(a_mat(1,lc_start)), &
int(lda*size_of_datatype,kind=c_intptr_t), &
int(lr_end*size_of_datatype,kind=c_intptr_t), &
int((lc_end - lc_start+1),kind=c_intptr_t), &
......@@ -989,14 +989,14 @@
lre = min(l_rows,(i+1)*l_rows_tile)
call obj%timer%start("blas")
call PRECISION_GEMM('C', 'N', lce-lcs+1, n_cols, lre, ONE, a(1,lcs), ubound(a,dim=1), &
call PRECISION_GEMM('C', 'N', lce-lcs+1, n_cols, lre, ONE, a_mat(1,lcs), ubound(a_mat,dim=1), &
vmrCPU, ubound(vmrCPU,dim=1), ONE, umcCPU(lcs,1), ubound(umcCPU,dim=1))
call obj%timer%stop("blas")
if (i==0) cycle
lre = min(l_rows,i*l_rows_tile)
call obj%timer%start("blas")
call PRECISION_GEMM('N', 'N', lre, n_cols, lce-lcs+1, ONE, a(1,lcs), lda, &
call PRECISION_GEMM('N', 'N', lre, n_cols, lce-lcs+1, ONE, a_mat(1,lcs), lda, &
umcCPU(lcs,n_cols+1), ubound(umcCPU,dim=1), ONE, vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1))
call obj%timer%stop("blas")
enddo
......@@ -1061,7 +1061,7 @@
if ( lre > lrs .and. l_cols > lcs ) then
call obj%timer%start("blas")
call PRECISION_GEMM('N', 'N', lre-lrs+1, n_cols, l_cols-lcs+1, &
ONE, a(lrs,lcs), ubound(a,dim=1), &
ONE, a_mat(lrs,lcs), ubound(a_mat,dim=1), &
umcCPU(lcs,n_cols+1), ubound(umcCPU,dim=1), &
ZERO, vmrCPU(lrs,n_cols+1), ubound(vmrCPU,dim=1))
call obj%timer%stop("blas")
......@@ -1072,7 +1072,7 @@
call obj%timer%start("blas")
call PRECISION_GEMM(BLAS_TRANS_OR_CONJ, 'N', &
lce-lcs+1, n_cols, lrs-1, &
ONE, a(1,lcs), ubound(a,dim=1), &
ONE, a_mat(1,lcs), ubound(a_mat,dim=1), &
vmrCPU(1,1), ubound(vmrCPU,dim=1), &
ZERO, umcCPU(lcs,1), ubound(umcCPU,dim=1))
call obj%timer%stop("blas")
......@@ -1152,13 +1152,13 @@
call obj%timer%start("blas")
call PRECISION_GEMM(BLAS_TRANS_OR_CONJ, 'N', &
lce-lcs+1, n_cols, lre, ONE, a(1,lcs), ubound(a,dim=1), &
lce-lcs+1, n_cols, lre, ONE, a_mat(1,lcs), ubound(a_mat,dim=1), &
vmrCPU, ubound(vmrCPU,dim=1), ONE, umcCPU(lcs,1), ubound(umcCPU,dim=1))
call obj%timer%stop("blas")
if (i==0) cycle
lre = min(l_rows,i*l_rows_tile)
call obj%timer%start("blas")
call PRECISION_GEMM('N', 'N', lre, n_cols, lce-lcs+1, ONE, a(1,lcs), lda, &
call PRECISION_GEMM('N', 'N', lre, n_cols, lce-lcs+1, ONE, a_mat(1,lcs), lda, &
umcCPU(lcs,n_cols+1), ubound(umcCPU,dim=1), ONE, &
vmrCPU(1,n_cols+1), ubound(vmrCPU,dim=1))
call obj%timer%stop("blas")
......@@ -1494,7 +1494,7 @@
call obj%timer%start("blas")
call PRECISION_GEMM('N', BLAS_TRANS_OR_CONJ, myend-mystart+1, lce-lcs+1, 2*n_cols, -ONE, &
vmrCPU(mystart, 1), ubound(vmrCPU,1), umcCPU(lcs,1), ubound(umcCPU,1), &
ONE, a(mystart,lcs), ubound(a,1))
ONE, a_mat(mystart,lcs), ubound(a_mat,1))
call obj%timer%stop("blas")
enddo
!$omp end parallel
......@@ -1507,7 +1507,7 @@
! call obj%timer%start("blas")
! call PRECISION_GEMM('N', 'C', lre,lce-lcs+1, 2*n_cols, -ONE, &
! vmrCPU, ubound(vmrCPU,dim=1), umcCPU(lcs,1), ubound(umcCPU,dim=1), &
! ONE, a(1,lcs), lda)
! ONE, a_mat(1,lcs), lda)
! call obj%timer%stop("blas")
! enddo
!#endif
......@@ -1536,7 +1536,7 @@
call obj%timer%start("blas")
call PRECISION_GEMM('N', BLAS_TRANS_OR_CONJ, lre,lce-lcs+1, 2*n_cols, -ONE, &
vmrCPU, ubound(vmrCPU,dim=1), umcCPU(lcs,1), ubound(umcCPU,dim=1), &
ONE, a(1,lcs), lda)
ONE, a_mat(1,lcs), lda)
call obj%timer%stop("blas")
endif ! useGPU
enddo ! i=0,(istep*nbw-1)/tile_size
......
......@@ -55,7 +55,7 @@
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nb, nblk, aMatrix, a_dev, lda, d, e, matrixCols, &
(obj, na, nb, nblk, a_mat, a_dev, lda, d, e, matrixCols, &
hh_trans, mpi_comm_rows, mpi_comm_cols, communicator, useGPU, wantDebug)
!-------------------------------------------------------------------------------
! tridiag_band_real/complex:
......@@ -67,7 +67,7 @@
!
! nblk blocksize of cyclic distribution, must be the same in both directions!
!
! aMatrix(lda,matrixCols) Distributed system matrix reduced to banded form in the upper diagonal
! a_mat(lda,matrixCols) Distributed system matrix reduced to banded form in the upper diagonal
!
! lda Leading dimension of a
! matrixCols local columns of matrix a
......@@ -95,9 +95,9 @@
logical, intent(in) :: useGPU, wantDebug
integer(kind=ik), intent(in) :: na, nb, nblk, lda, matrixCols, mpi_comm_rows, mpi_comm_cols, communicator
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=rck), intent(in) :: aMatrix(lda,*)
MATH_DATATYPE(kind=rck), intent(in) :: a_mat(lda,*)
#else
MATH_DATATYPE(kind=rck), intent(in) :: aMatrix(lda,matrixCols)
MATH_DATATYPE(kind=rck), intent(in) :: a_mat(lda,matrixCols)
#endif
integer(kind=c_intptr_t) :: a_dev
real(kind=rk), intent(out) :: d(na), e(na) ! set only on PE 0
......@@ -228,7 +228,7 @@
&MATH_DATATYPE&
&_&
&PRECISION&
&(obj,aMatrix, a_dev, lda, na, nblk, nb, matrixCols, mpi_comm_rows, mpi_comm_cols, communicator, ab, useGPU)
&(obj,a_mat, a_dev, lda, na, nblk, nb, matrixCols, mpi_comm_rows, mpi_comm_cols, communicator, ab, useGPU)
! Calculate the workload for each sweep in the back transformation
! and the space requirements to hold the HH vectors
......
......@@ -51,7 +51,7 @@ subroutine redist_band_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, a, a_dev, lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, communicator, ab, useGPU)
(obj, a_mat, a_dev, lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, communicator, ab, useGPU)
use elpa_abstract_impl
use elpa2_workload
......@@ -65,7 +65,7 @@ subroutine redist_band_&
class(elpa_abstract_impl_t), intent(inout) :: obj
logical, intent(in) :: useGPU
integer(kind=ik), intent(in) :: lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, communicator
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in) :: a(lda, matrixCols)
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in) :: a_mat(lda, matrixCols)
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(out) :: ab(:,:)
integer(kind=ik), allocatable :: ncnt_s(:), nstart_s(:), ncnt_r(:), nstart_r(:), &
......@@ -91,7 +91,7 @@ subroutine redist_band_&
if (useGPU) then
! copy a_dev to aMatrix
successCUDA = cuda_memcpy (loc(a), int(a_dev,kind=c_intptr_t), int(lda*matrixCols* size_of_datatype, kind=c_intptr_t), &
successCUDA = cuda_memcpy (loc(a_mat), int(a_dev,kind=c_intptr_t), int(lda*matrixCols* size_of_datatype, kind=c_intptr_t), &
cudaMemcpyDeviceToHost)
if (.not.(successCUDA)) then
print *,"redist_band_&
......@@ -175,8 +175,8 @@ subroutine redist_band_&
! Fill send buffer
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of a
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a_mat
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of a_mat
np = 0
do j=0,(na-1)/nblk ! loop over rows of blocks
......@@ -190,7 +190,7 @@ subroutine redist_band_&
jl = MIN(nblk,l_rows-js)
il = MIN(nblk,l_cols-is)
sbuf(1:jl,1:il,nstart_s(np)) = a(js+1:js+jl,is+1:is+il)
sbuf(1:jl,1:il,nstart_s(np)) = a_mat(js+1:js+jl,is+1:is+il)
endif
enddo
endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment