Commit 7a7d45e1 authored by Andreas Marek's avatar Andreas Marek

Explicitly do the conversion of data types where needed

parent 8c10c2dc
......@@ -338,8 +338,14 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a_mat
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a_mat
if (my_prow == prow(na, nblk, np_rows) .and. my_pcol == pcol(na, nblk, np_cols)) &
#if COMPLEXCASE == 1
d_vec(na) = real(a_mat(l_rows,l_cols), kind=rk)
#endif
#if REALCASE == 1
d_vec(na) = a_mat(l_rows,l_cols)
#endif
if (useGPU) then
! allocate memmory for matrix A on the device and than copy the matrix
......@@ -415,7 +421,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
#else /* WITH_MPI */
aux2 = aux1
#endif /* WITH_MPI */
#if REALCASE == 1
vnorm2 = aux2(1)
#endif
#if COMPLEXCASE == 1
vnorm2 = real(aux2(1),kind=rk)
#endif
vrl = aux2(2)
! Householder transformation
......@@ -434,7 +446,12 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
v_row(l_rows) = 1.
! vrl is newly computed off-diagonal element of the final tridiagonal matrix
#if REALCASE == 1
e_vec(istep-1) = vrl
#endif
#if COMPLEXCASE == 1
e_vec(istep-1) = real(vrl,kind=rk)
#endif
endif
! store Householder Vector for back transformation
......@@ -836,7 +853,12 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
a_mat(l_rows,l_cols) = a_mat(l_rows,l_cols) &
+ dot_product(vu_stored_rows(l_rows,1:2*n_stored_vecs),uv_stored_cols(l_cols,1:2*n_stored_vecs))
end if
#if REALCASE == 1
d_vec(istep-1) = a_mat(l_rows,l_cols)
#endif
#if COMPLEXCASE == 1
d_vec(istep-1) = real(a_mat(l_rows,l_cols),kind=rk)
#endif
if (useGPU) then
!a_dev(l_rows,l_cols) = a_mat(l_rows,l_cols)
......@@ -868,7 +890,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
call hh_transform_complex_&
&PRECISION &
(obj, vrl, 0.0_rk, xf, tau(2), wantDebug)
#if REALCASE == 1
e_vec(1) = vrl
#endif
#if COMPLEXCASE == 1
e_vec(1) = real(vrl,kind=rk)
#endif
a_mat(1,l_cols) = 1. ! for consistency only
endif
......
......@@ -312,7 +312,7 @@
mpi_comm_rows, mpi_comm_cols, blockheuristic)
#endif
work_size = dwork_size(1)
work_size = int(dwork_size(1))
allocate(work_blocked(work_size), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"bandred_real: error when allocating work_blocked "//errorMessage
......@@ -626,7 +626,12 @@
aux2 = aux1 ! this should be optimized
#endif
#if REALCASE == 1
vnorm2 = aux2(1)
#endif
#if COMPLEXCASE == 1
vnorm2 = real(aux2(1),kind=rk)
#endif
vrl = aux2(2)
! Householder transformation
......
......@@ -500,10 +500,23 @@
#if REALCASE == 1
endif
#endif
#if REALCASE == 1
d(istep) = ab(1,na_s-n_off)
e(istep) = ab(2,na_s-n_off)
#endif
#if COMPLEXCASE == 1
d(istep) = real(ab(1,na_s-n_off), kind=rk)
e(istep) = real(ab(2,na_s-n_off), kind=rk)
#endif
if (istep == na-1) then
#if REALCASE == 1
d(na) = ab(1,na_s+1-n_off)
#endif
#if COMPLEXCASE == 1
d(na) = real(ab(1,na_s+1-n_off),kind=rk)
#endif
e(na) = 0.0_rck
endif
else
......
......@@ -140,7 +140,7 @@
temptau_offset = 1
temptau_size = total_cols
broadcast_offset = temptau_offset + temptau_size
broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1)
broadcast_size = int(dbroadcast_size(1) + dtmat_bcast_size(1))
work_offset = broadcast_offset + broadcast_size
if (lwork .eq. -1) then
......@@ -239,7 +239,7 @@
&PRECISION &
(obj,v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,&
0,mpicomm_rows)
broadcast_size = dbroadcast_size(1)
broadcast_size = int(dbroadcast_size(1))
!if (mpirank_rows .eq. 0) then
! pack tmatrix into broadcast buffer and calculate new size
......@@ -250,7 +250,7 @@
call qr_pdgeqrf_pack_unpack_tmatrix_&
&PRECISION &
(obj,tau(offset),t(voffset,voffset),ldt,dtmat_bcast_size(1),-1,lcols,0)
broadcast_size = broadcast_size + dtmat_bcast_size(1)
broadcast_size = broadcast_size + int(dtmat_bcast_size(1))
!end if
! initiate broadcast (send part)
......@@ -276,7 +276,7 @@
call qr_pdgeqrf_pack_unpack_&
&PRECISION &
(obj,v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,1,mpicomm_rows)
broadcast_size = dbroadcast_size(1)
broadcast_size = int(dbroadcast_size(1))
call qr_pdgeqrf_pack_unpack_tmatrix_&
&PRECISION &
......@@ -309,8 +309,8 @@
mb,rowidx,idx,rev,1,mpicomm_rows)
! now send t matrix to other processes in our process column
broadcast_size = dbroadcast_size(1)
tmat_bcast_size = dtmat_bcast_size(1)
broadcast_size = int(dbroadcast_size(1))
tmat_bcast_size = int(dtmat_bcast_size(1))
! t matrix should now be available on all processes => unpack
call qr_pdgeqrf_pack_unpack_tmatrix_&
......@@ -1139,7 +1139,7 @@
call qr_pdlarfg2_1dcomm_seed_&
&PRECISION &
(obj,a,lda,dseedwork_size(1),-1,work,m,mb,idx,rev,mpicomm)
seedwork_size = dseedwork_size(1)
seedwork_size = int(dseedwork_size(1))
seed_size = seedwork_size
if (lwork .eq. -1) then
......@@ -1516,6 +1516,9 @@
if (mpirank .eq. mpirank_top) then
topidx = local_index(idx,mpirank_top,mpiprocs,nb,0)
top = 1+(topidx-1)*incx
else
top = -99
stop
end if
alpha = seed(id*5+1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment