Commit 7a7d45e1 authored by Andreas Marek's avatar Andreas Marek

Explicitly do the conversion of data types where needed

parent 8c10c2dc
...@@ -338,8 +338,14 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ...@@ -338,8 +338,14 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a_mat l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a_mat
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a_mat l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a_mat
if (my_prow == prow(na, nblk, np_rows) .and. my_pcol == pcol(na, nblk, np_cols)) & if (my_prow == prow(na, nblk, np_rows) .and. my_pcol == pcol(na, nblk, np_cols)) &
#if COMPLEXCASE == 1
d_vec(na) = real(a_mat(l_rows,l_cols), kind=rk)
#endif
#if REALCASE == 1
d_vec(na) = a_mat(l_rows,l_cols) d_vec(na) = a_mat(l_rows,l_cols)
#endif
if (useGPU) then if (useGPU) then
! allocate memmory for matrix A on the device and than copy the matrix ! allocate memmory for matrix A on the device and than copy the matrix
...@@ -415,7 +421,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ...@@ -415,7 +421,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
#else /* WITH_MPI */ #else /* WITH_MPI */
aux2 = aux1 aux2 = aux1
#endif /* WITH_MPI */ #endif /* WITH_MPI */
#if REALCASE == 1
vnorm2 = aux2(1) vnorm2 = aux2(1)
#endif
#if COMPLEXCASE == 1
vnorm2 = real(aux2(1),kind=rk)
#endif
vrl = aux2(2) vrl = aux2(2)
! Householder transformation ! Householder transformation
...@@ -434,7 +446,12 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ...@@ -434,7 +446,12 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
v_row(l_rows) = 1. v_row(l_rows) = 1.
! vrl is newly computed off-diagonal element of the final tridiagonal matrix ! vrl is newly computed off-diagonal element of the final tridiagonal matrix
#if REALCASE == 1
e_vec(istep-1) = vrl e_vec(istep-1) = vrl
#endif
#if COMPLEXCASE == 1
e_vec(istep-1) = real(vrl,kind=rk)
#endif
endif endif
! store Householder Vector for back transformation ! store Householder Vector for back transformation
...@@ -836,7 +853,12 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ...@@ -836,7 +853,12 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
a_mat(l_rows,l_cols) = a_mat(l_rows,l_cols) & a_mat(l_rows,l_cols) = a_mat(l_rows,l_cols) &
+ dot_product(vu_stored_rows(l_rows,1:2*n_stored_vecs),uv_stored_cols(l_cols,1:2*n_stored_vecs)) + dot_product(vu_stored_rows(l_rows,1:2*n_stored_vecs),uv_stored_cols(l_cols,1:2*n_stored_vecs))
end if end if
#if REALCASE == 1
d_vec(istep-1) = a_mat(l_rows,l_cols) d_vec(istep-1) = a_mat(l_rows,l_cols)
#endif
#if COMPLEXCASE == 1
d_vec(istep-1) = real(a_mat(l_rows,l_cols),kind=rk)
#endif
if (useGPU) then if (useGPU) then
!a_dev(l_rows,l_cols) = a_mat(l_rows,l_cols) !a_dev(l_rows,l_cols) = a_mat(l_rows,l_cols)
...@@ -868,7 +890,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ...@@ -868,7 +890,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
call hh_transform_complex_& call hh_transform_complex_&
&PRECISION & &PRECISION &
(obj, vrl, 0.0_rk, xf, tau(2), wantDebug) (obj, vrl, 0.0_rk, xf, tau(2), wantDebug)
#if REALCASE == 1
e_vec(1) = vrl e_vec(1) = vrl
#endif
#if COMPLEXCASE == 1
e_vec(1) = real(vrl,kind=rk)
#endif
a_mat(1,l_cols) = 1. ! for consistency only a_mat(1,l_cols) = 1. ! for consistency only
endif endif
......
...@@ -312,7 +312,7 @@ ...@@ -312,7 +312,7 @@
mpi_comm_rows, mpi_comm_cols, blockheuristic) mpi_comm_rows, mpi_comm_cols, blockheuristic)
#endif #endif
work_size = dwork_size(1) work_size = int(dwork_size(1))
allocate(work_blocked(work_size), stat=istat, errmsg=errorMessage) allocate(work_blocked(work_size), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then if (istat .ne. 0) then
print *,"bandred_real: error when allocating work_blocked "//errorMessage print *,"bandred_real: error when allocating work_blocked "//errorMessage
...@@ -626,7 +626,12 @@ ...@@ -626,7 +626,12 @@
aux2 = aux1 ! this should be optimized aux2 = aux1 ! this should be optimized
#endif #endif
#if REALCASE == 1
vnorm2 = aux2(1) vnorm2 = aux2(1)
#endif
#if COMPLEXCASE == 1
vnorm2 = real(aux2(1),kind=rk)
#endif
vrl = aux2(2) vrl = aux2(2)
! Householder transformation ! Householder transformation
......
...@@ -500,10 +500,23 @@ ...@@ -500,10 +500,23 @@
#if REALCASE == 1 #if REALCASE == 1
endif endif
#endif #endif
#if REALCASE == 1
d(istep) = ab(1,na_s-n_off) d(istep) = ab(1,na_s-n_off)
e(istep) = ab(2,na_s-n_off) e(istep) = ab(2,na_s-n_off)
#endif
#if COMPLEXCASE == 1
d(istep) = real(ab(1,na_s-n_off), kind=rk)
e(istep) = real(ab(2,na_s-n_off), kind=rk)
#endif
if (istep == na-1) then if (istep == na-1) then
#if REALCASE == 1
d(na) = ab(1,na_s+1-n_off) d(na) = ab(1,na_s+1-n_off)
#endif
#if COMPLEXCASE == 1
d(na) = real(ab(1,na_s+1-n_off),kind=rk)
#endif
e(na) = 0.0_rck e(na) = 0.0_rck
endif endif
else else
......
...@@ -140,7 +140,7 @@ ...@@ -140,7 +140,7 @@
temptau_offset = 1 temptau_offset = 1
temptau_size = total_cols temptau_size = total_cols
broadcast_offset = temptau_offset + temptau_size broadcast_offset = temptau_offset + temptau_size
broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1) broadcast_size = int(dbroadcast_size(1) + dtmat_bcast_size(1))
work_offset = broadcast_offset + broadcast_size work_offset = broadcast_offset + broadcast_size
if (lwork .eq. -1) then if (lwork .eq. -1) then
...@@ -239,7 +239,7 @@ ...@@ -239,7 +239,7 @@
&PRECISION & &PRECISION &
(obj,v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,& (obj,v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,&
0,mpicomm_rows) 0,mpicomm_rows)
broadcast_size = dbroadcast_size(1) broadcast_size = int(dbroadcast_size(1))
!if (mpirank_rows .eq. 0) then !if (mpirank_rows .eq. 0) then
! pack tmatrix into broadcast buffer and calculate new size ! pack tmatrix into broadcast buffer and calculate new size
...@@ -250,7 +250,7 @@ ...@@ -250,7 +250,7 @@
call qr_pdgeqrf_pack_unpack_tmatrix_& call qr_pdgeqrf_pack_unpack_tmatrix_&
&PRECISION & &PRECISION &
(obj,tau(offset),t(voffset,voffset),ldt,dtmat_bcast_size(1),-1,lcols,0) (obj,tau(offset),t(voffset,voffset),ldt,dtmat_bcast_size(1),-1,lcols,0)
broadcast_size = broadcast_size + dtmat_bcast_size(1) broadcast_size = broadcast_size + int(dtmat_bcast_size(1))
!end if !end if
! initiate broadcast (send part) ! initiate broadcast (send part)
...@@ -276,7 +276,7 @@ ...@@ -276,7 +276,7 @@
call qr_pdgeqrf_pack_unpack_& call qr_pdgeqrf_pack_unpack_&
&PRECISION & &PRECISION &
(obj,v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,1,mpicomm_rows) (obj,v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,1,mpicomm_rows)
broadcast_size = dbroadcast_size(1) broadcast_size = int(dbroadcast_size(1))
call qr_pdgeqrf_pack_unpack_tmatrix_& call qr_pdgeqrf_pack_unpack_tmatrix_&
&PRECISION & &PRECISION &
...@@ -309,8 +309,8 @@ ...@@ -309,8 +309,8 @@
mb,rowidx,idx,rev,1,mpicomm_rows) mb,rowidx,idx,rev,1,mpicomm_rows)
! now send t matrix to other processes in our process column ! now send t matrix to other processes in our process column
broadcast_size = dbroadcast_size(1) broadcast_size = int(dbroadcast_size(1))
tmat_bcast_size = dtmat_bcast_size(1) tmat_bcast_size = int(dtmat_bcast_size(1))
! t matrix should now be available on all processes => unpack ! t matrix should now be available on all processes => unpack
call qr_pdgeqrf_pack_unpack_tmatrix_& call qr_pdgeqrf_pack_unpack_tmatrix_&
...@@ -1139,7 +1139,7 @@ ...@@ -1139,7 +1139,7 @@
call qr_pdlarfg2_1dcomm_seed_& call qr_pdlarfg2_1dcomm_seed_&
&PRECISION & &PRECISION &
(obj,a,lda,dseedwork_size(1),-1,work,m,mb,idx,rev,mpicomm) (obj,a,lda,dseedwork_size(1),-1,work,m,mb,idx,rev,mpicomm)
seedwork_size = dseedwork_size(1) seedwork_size = int(dseedwork_size(1))
seed_size = seedwork_size seed_size = seedwork_size
if (lwork .eq. -1) then if (lwork .eq. -1) then
...@@ -1516,6 +1516,9 @@ ...@@ -1516,6 +1516,9 @@
if (mpirank .eq. mpirank_top) then if (mpirank .eq. mpirank_top) then
topidx = local_index(idx,mpirank_top,mpiprocs,nb,0) topidx = local_index(idx,mpirank_top,mpiprocs,nb,0)
top = 1+(topidx-1)*incx top = 1+(topidx-1)*incx
else
top = -99
stop
end if end if
alpha = seed(id*5+1) alpha = seed(id*5+1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment