Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
746911c9
Commit
746911c9
authored
Jun 14, 2016
by
Andreas Marek
Browse files
Fix error due to lines with more than 134 characters
-also remove a type in variable name
parent
03ef1ee4
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/elpa2.F90
View file @
746911c9
...
...
@@ -422,11 +422,11 @@ contains
print
*
,
"useGPU== "
,
useGPU
ttt0
=
MPI_Wtime
()
#ifdef DOUBLE_PRECISION_REAL
call
trans_ev_band_to_full_real_double
(
na
,
nev
,
nblk
,
nbw
,
a
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
matrixCols
,
num_blocks
,
mpi_comm_rows
,
&
mpi_comm_cols
,
useGPU
,
useQRActual
)
call
trans_ev_band_to_full_real_double
(
na
,
nev
,
nblk
,
nbw
,
a
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
matrixCols
,
&
num_blocks
,
mpi_comm_rows
,
mpi_comm_cols
,
useGPU
,
useQRActual
)
#else
call
trans_ev_band_to_full_real_single
(
na
,
nev
,
nblk
,
nbw
,
a
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
matrixCols
,
num_blocks
,
mpi_comm_rows
,
&
mpi_comm_cols
,
useGPU
,
useQRActual
)
call
trans_ev_band_to_full_real_single
(
na
,
nev
,
nblk
,
nbw
,
a
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
matrixCols
,
&
num_blocks
,
mpi_comm_rows
,
mpi_comm_cols
,
useGPU
,
useQRActual
)
#endif
ttt1
=
MPI_Wtime
()
...
...
@@ -773,11 +773,11 @@ contains
print
*
,
"useGPU== "
,
useGPU
ttt0
=
MPI_Wtime
()
#ifdef DOUBLE_PRECISION_REAL
call
trans_ev_band_to_full_real_double
(
na
,
nev
,
nblk
,
nbw
,
a
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
matrixCols
,
num_blocks
,
mpi_comm_rows
,
&
mpi_comm_cols
,
useGPU
,
useQRActual
)
call
trans_ev_band_to_full_real_double
(
na
,
nev
,
nblk
,
nbw
,
a
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
&
matrixCols
,
num_blocks
,
mpi_comm_rows
,
mpi_comm_cols
,
useGPU
,
useQRActual
)
#else
call
trans_ev_band_to_full_real_single
(
na
,
nev
,
nblk
,
nbw
,
a
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
matrixCols
,
num_blocks
,
mpi_comm_rows
,
&
mpi_comm_cols
,
useGPU
,
useQRActual
)
call
trans_ev_band_to_full_real_single
(
na
,
nev
,
nblk
,
nbw
,
a
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q
,
q_dev
,
ldq
,
&
matrixCols
,
num_blocks
,
mpi_comm_rows
,
mpi_comm_cols
,
useGPU
,
useQRActual
)
#endif
ttt1
=
MPI_Wtime
()
...
...
src/elpa2_compute_real_template.X90
View file @
746911c9
...
...
@@ -4039,13 +4039,13 @@
if (useGPU) then
! An unpacking of the current row group may occur before queuing the next row
#ifdef DOUBLE_PRECISION_REAL
call unpack_and_prepare_row_group_real_gpu_double(row_group, row_group_dev, aIntern_dev, stripe_count,
stripe_width,
&
last_stripe_width, a_dim2, l_nev,
row_group_size, nblk,
&
unpack_idx, i - limits(my_prow), .false.)
call unpack_and_prepare_row_group_real_gpu_double(row_group, row_group_dev, aIntern_dev, stripe_count, &
stripe_width,
last_stripe_width, a_dim2, l_nev, &
row_group_size, nblk,
unpack_idx, i - limits(my_prow), .false.)
#else
call unpack_and_prepare_row_group_real_gpu_single(row_group, row_group_dev, aIntern_dev, stripe_count,
stripe_width,
&
last_stripe_width, a_dim2, l_nev,
row_group_size, nblk,
&
unpack_idx, i - limits(my_prow), .false.)
call unpack_and_prepare_row_group_real_gpu_single(row_group, row_group_dev, aIntern_dev, stripe_count, &
stripe_width,
last_stripe_width, a_dim2, l_nev, &
row_group_size, nblk,
unpack_idx, i - limits(my_prow), .false.)
#endif
#ifdef WITH_MPI
...
...
@@ -4524,7 +4524,7 @@
n_off = current_local_n+a_off
b_len = csw*nbw
b_off = (my_thread-1)*b_len
aInten(1:csw,n_off+1:n_off+nbw,i,my_thread) = &
aInte
r
n(1:csw,n_off+1:n_off+nbw,i,my_thread) = &
reshape(bottom_border_recv_buffer(b_off+1:b_off+b_len,i), (/ csw, nbw /))
enddo
!$omp end parallel do
...
...
@@ -4679,15 +4679,17 @@
reshape(top_border_recv_buffer(b_off+1:b_off+b_len,i), (/ csw, top_msg_length /))
endif
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, current_local_n, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, current_local_n, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern,stripe_width,a_dim2,stripe_count, max_threads, l_nev, &
...
...
@@ -4707,12 +4709,12 @@
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aInern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_single(aIntern, aIn
t
ern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, current_local_n, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
...
...
@@ -4816,17 +4818,17 @@
!$omp parallel do private(my_thread, b_len, b_off), schedule(static, 1)
do my_thread = 1, max_threads
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
max_threads, l_nev,
&
a_off, nbw, max_blk_size, bcast_buffer,
bcast_buffer_dev, hh_dot_dev,
&
hh_tau_dev, kernel_flops, kernel_time,
&
current_local_n - bottom_msg_length,
bottom_msg_length, i, my_thread,
&
thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev,
a_off, nbw, max_blk_size, bcast_buffer,
&
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops,
&
kernel_time,
current_local_n - bottom_msg_length,
&
bottom_msg_length, i, my_thread,
thread_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
max_threads, l_nev,
&
a_off, nbw, max_blk_size, bcast_buffer,
bcast_buffer_dev, hh_dot_dev,
&
hh_tau_dev, kernel_flops, kernel_time,
&
current_local_n - bottom_msg_length,
bottom_msg_length, i, my_thread,
&
thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev,
a_off, nbw, max_blk_size, bcast_buffer,
&
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops,
&
kernel_time,
current_local_n - bottom_msg_length,
&
bottom_msg_length, i, my_thread,
thread_width, THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern, stripe_width,a_dim2,stripe_count, max_threads, l_nev, &
! a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
...
...
@@ -4850,7 +4852,7 @@
n_off = current_local_n+nbw-bottom_msg_length+a_off
b_len = csw*bottom_msg_length*max_threads
bottom_border_send_buffer(1:b_len,i) = &
reshape(a(1:csw,n_off+1:n_off+bottom_msg_length,i,:), (/ b_len /))
reshape(a
Intern
(1:csw,n_off+1:n_off+bottom_msg_length,i,:), (/ b_len /))
#ifdef WITH_MPI
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -4871,14 +4873,15 @@
endif
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
current_local_n - bottom_msg_length, bottom_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
current_local_n - bottom_msg_length, bottom_msg_length, i, &
...
...
@@ -4947,17 +4950,19 @@
!$omp parallel do private(my_thread), schedule(static, 1)
do my_thread = 1, max_threads
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
top_msg_length, current_local_n-top_msg_length-bottom_msg_length, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, my_thread, &
thread_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
top_msg_length, current_local_n-top_msg_length-bottom_msg_length, i, &
my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width ,a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, my_thread, &
thread_width, THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern, stripe_width, a_dim2,stripe_count, max_threads, l_nev, &
! a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
...
...
@@ -4976,13 +4981,13 @@
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, top_msg_length, &
current_local_n-top_msg_length-bottom_msg_length, i, &
...
...
@@ -5052,15 +5057,17 @@
reshape(top_border_recv_buffer(b_off+1:b_off+b_len,i), (/ csw, top_msg_length /))
endif
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
0, top_msg_length, i, my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, top_msg_length, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, &
0, top_msg_length, i, my_thread, thread_width, THIS_REAL_ELPA_KERNEL)
call compute_hh_trafo_real_cpu_openmp_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
max_threads, l_nev, a_off, nbw, max_blk_size, bcast_buffer, &
bcast_buffer_dev, hh_dot_dev, hh_tau_dev, kernel_flops, &
kernel_time, 0, top_msg_length, i, my_thread, thread_width, &
THIS_REAL_ELPA_KERNEL)
#endif
! call compute_hh_trafo_real_cpu_openmp(aIntern, stripe_width,a_dim2,stripe_count, max_threads, l_nev, &
! a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
...
...
@@ -5078,12 +5085,12 @@
#else /* WITH_OPENMP */
#ifdef DOUBLE_PRECISION_REAL
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_double(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, top_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
#else
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count,
&
call compute_hh_trafo_real_cpu_single(aIntern, aIntern_dev, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, hh_dot_dev, &
hh_tau_dev, kernel_flops, kernel_time, 0, top_msg_length, i, &
last_stripe_width, THIS_REAL_ELPA_KERNEL)
...
...
@@ -5114,7 +5121,8 @@
#else /* WITH_MPI */
! carefull the "recieve" has to be done at the corresponding wait or send
! top_border_recv_buffer(1:csw*next_top_msg_length*max_threads,i) = bottom_border_send_buffer(1:csw*next_top_msg_length*max_threads,i)
! top_border_recv_buffer(1:csw*next_top_msg_length*max_threads,i) = &
!bottom_border_send_buffer(1:csw*next_top_msg_length*max_threads,i)
#endif /* WITH_MPI */
#else /* WITH_OPENMP */
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment