Commit 746911c9 authored by Andreas Marek's avatar Andreas Marek
Browse files

Fix error due to lines with more than 134 characters

-also remove a type in variable name
parent 03ef1ee4
......@@ -422,11 +422,11 @@ contains
print *,"useGPU== ",useGPU
ttt0 = MPI_Wtime()
#ifdef DOUBLE_PRECISION_REAL
call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual)
call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, &
num_blocks, mpi_comm_rows, mpi_comm_cols, useGPU, useQRActual)
#else
call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual)
call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, &
num_blocks, mpi_comm_rows, mpi_comm_cols, useGPU, useQRActual)
#endif
ttt1 = MPI_Wtime()
......@@ -773,11 +773,11 @@ contains
print *,"useGPU== ",useGPU
ttt0 = MPI_Wtime()
#ifdef DOUBLE_PRECISION_REAL
call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual)
call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, &
matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, useGPU, useQRActual)
#else
call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual)
call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, &
matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, useGPU, useQRActual)
#endif
ttt1 = MPI_Wtime()
......
......@@ -4039,13 +4039,13 @@
if (useGPU) then
! An unpacking of the current row group may occur before queuing the next row
#ifdef DOUBLE_PRECISION_REAL
call unpack_and_prepare_row_group_real_gpu_double(row_group, row_group_dev, aIntern_dev, stripe_count, stripe_width, &
last_stripe_width, a_dim2, l_nev, row_group_size, nblk, &
unpack_idx, i - limits(my_prow), .false.)
call unpack_and_prepare_row_group_real_gpu_double(row_group, row_group_dev, aIntern_dev, stripe_count, &
stripe_width, last_stripe_width, a_dim2, l_nev, &
row_group_size, nblk, unpack_idx, i - limits(my_prow), .false.)
#else
call unpack_and_prepare_row_group_real_gpu_single(row_group, row_group_dev, aIntern_dev, stripe_count, stripe_width, &
last_stripe_width, a_dim2, l_nev, row_group_size, nblk, &
unpack_idx, i - limits(my_prow), .false.)
call unpack_and_prepare_row_group_real_gpu_single(row_group, row_group_dev, aIntern_dev, stripe_count, &
stripe_width, last_stripe_width, a_dim2, l_nev, &
row_group_size, nblk, unpack_idx, i - limits(my_prow), .false.)
#endif
#ifdef WITH_MPI
......@@ -4524,7 +4524,7 @@
n_off = current_local_n+a_off
b_len = csw*nbw
b_off = (my_thread-1)*b_len
aInten(1:csw,n_off+1:n_off+nbw,i,my_thread) = &
aIntern(1:csw,n_off+1:n_off+nbw,i,my_thread) = &
reshape(bottom_border_recv_buffer(b_off+1:b_off+b_len,i), (/ csw, nbw /))
enddo
!$omp end parallel do
......@@ -4679,15 +4679,17 @@
reshape(top_border_recv_buffer(b_off+1:b_off+b_len,i), (/ csw, top_msg_length /))
endif
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, current_local_n, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, current_local_n, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern,stripe_width,a_dim2,stripe_count, max_threads, l_nev, &
......@@ -4707,12 +4709,12 @@
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aInern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
......@@ -4816,17 +4818,17 @@
!$omp parallel do private(my_thread, b_len, b_off), schedule(static, 1)
do my_thread = 1, max_threads
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
current_local_n - bottom_msg_length, bottom_msg_length, i, my_thread, &
thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, current_local_n - bottom_msg_length, &
bottom_msg_length, i, my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
current_local_n - bottom_msg_length, bottom_msg_length, i, my_thread, &
thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, current_local_n - bottom_msg_length, &
bottom_msg_length, i, my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern, stripe_width,a_dim2,stripe_count, max_threads, l_nev, &
! a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
......@@ -4850,7 +4852,7 @@
n_off = current_local_n+nbw-bottom_msg_length+a_off
b_len = csw*bottom_msg_length*max_threads
bottom_border_send_buffer(1:b_len,i) = &
reshape(a(1:csw,n_off+1:n_off+bottom_msg_length,i,:), (/ b_len /))
reshape(aIntern(1:csw,n_off+1:n_off+bottom_msg_length,i,:), (/ b_len /))
#ifdef WITH_MPI
#ifdef DOUBLE_PRECISION_REAL
......@@ -4871,14 +4873,15 @@
endif
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
current_local_n - bottom_msg_length, bottom_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
current_local_n - bottom_msg_length, bottom_msg_length, i, &
......@@ -4947,17 +4950,19 @@
!$omp parallel do private(my_thread), schedule(static, 1)
do my_thread = 1, max_threads
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
top_msg_length, current_local_n-top_msg_length-bottom_msg_length, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, my_thread, &
thread_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
top_msg_length, current_local_n-top_msg_length-bottom_msg_length, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, my_thread, &
thread_width, THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern, stripe_width, a_dim2,stripe_count, max_threads, l_nev, &
! a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
......@@ -4976,13 +4981,13 @@
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, &
......@@ -5052,15 +5057,17 @@
reshape(top_border_recv_buffer(b_off+1:b_off+b_len,i), (/ csw, top_msg_length /))
endif
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
0, top_msg_length, i, my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, top_msg_length, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
0, top_msg_length, i, my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, top_msg_length, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern, stripe_width,a_dim2,stripe_count, max_threads, l_nev, &
! a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
......@@ -5078,12 +5085,12 @@
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, top_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, top_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
......@@ -5114,7 +5121,8 @@
#else /* WITH_MPI */
! carefull the "recieve" has to be done at the corresponding wait or send
! top_border_recv_buffer(1:csw*next_top_msg_length*max_threads,i) = bottom_border_send_buffer(1:csw*next_top_msg_length*max_threads,i)
! top_border_recv_buffer(1:csw*next_top_msg_length*max_threads,i) = &
!bottom_border_send_buffer(1:csw*next_top_msg_length*max_threads,i)
#endif /* WITH_MPI */
#else /* WITH_OPENMP */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment