From ddba9c0f8921cc407095063d5923c49329e47bb8 Mon Sep 17 00:00:00 2001 From: Pavel Kus Date: Tue, 29 Aug 2017 14:10:05 +0200 Subject: [PATCH] real/complex unifications in initializations in elpa2_trans_ev_tridi_to_band_template --- .../elpa2_trans_ev_tridi_to_band_template.F90 | 94 +++---------------- 1 file changed, 14 insertions(+), 80 deletions(-) diff --git a/src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 b/src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 index 631da741..34549758 100644 --- a/src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 +++ b/src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 @@ -525,13 +525,7 @@ stop 1 endif -#if REALCASE == 1 - row_group(:, :) = CONST_0_0 -#endif -#if COMPLEXCASE == 1 - row_group(:, :) = CONST_COMPLEX_0_0 -#endif - + row_group(:, :) = 0.0_rck num = (l_nev*nblk)* size_of_datatype successCUDA = cuda_malloc(row_group_dev, num) if (.not.(successCUDA)) then @@ -558,12 +552,7 @@ #ifdef WITH_OPENMP if (posix_memalign(aIntern_ptr, 64_c_intptr_t, stripe_width*a_dim2*stripe_count*max_threads* & -#if REALCASE == 1 C_SIZEOF(a_var)) /= 0) then -#endif -#if COMPLEXCASE == 1 - C_SIZEOF(a_var)) /= 0) then -#endif print *,"trans_ev_tridi_to_band_& &MATH_DATATYPE& &: error when allocating aIntern"//errorMessage @@ -578,12 +567,7 @@ #else /* WITH_OPENMP */ if (posix_memalign(aIntern_ptr, 64_c_intptr_t, stripe_width*a_dim2*stripe_count* & -#if REALCASE == 1 - C_SIZEOF(a_var)) /= 0) then -#endif -#if COMPLEXCASE == 1 C_SIZEOF(a_var)) /= 0) then -#endif print *,"trans_ev_tridi_to_band_real: error when allocating aIntern"//errorMessage stop 1 endif @@ -591,12 +575,7 @@ call c_f_pointer(aIntern_ptr, aIntern,[stripe_width,a_dim2,stripe_count] ) !allocate(aIntern(stripe_width,a_dim2,stripe_count), stat=istat, errmsg=errorMessage) -#if REALCASE == 1 - aIntern(:,:,:) = CONST_0_0 -#endif -#if COMPLEXCASE == 1 - aIntern(:,:,:) = 0 -#endif + aIntern(:,:,:) = 0.0_rck #endif /* WITH_OPENMP */ endif !useGPU @@ -608,13 +587,7 @@ stop 1 endif -#if REALCASE == 1 - row(:) = CONST_0_0 -#endif -#if COMPLEXCASE == 1 - row(:) = 0 -#endif - + row(:) = 0.0_rck ! Copy q from a block cyclic distribution into a distribution with contiguous rows, ! and transpose the matrix using stripes of given stripe_width for cache blocking. @@ -630,12 +603,7 @@ call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX) !$omp parallel do private(my_thread), schedule(static, 1) do my_thread = 1, max_threads -#if REALCASE == 1 - aIntern(:,:,:,my_thread) = CONST_0_0 ! if possible, do first touch allocation! -#endif -#if COMPLEXCASE == 1 - aIntern(:,:,:,my_thread) = CONST_COMPLEX_0_0 ! if possible, do first touch allocation! -#endif + aIntern(:,:,:,my_thread) = 0.0_rck ! if possible, do first touch allocation! enddo !$omp end parallel do @@ -759,14 +727,7 @@ row_group_size, nblk, unpack_idx, & i - limits(ip), .false.) -#if REALCASE == 1 - row_group(:, row_group_size) = q(src_offset, 1:l_nev) -#endif - -#if COMPLEXCASE == 1 row_group(:, row_group_size) = q(src_offset, 1:l_nev) -#endif - #else /* WITH_OPENMP */ #if COMPLEXCASE == 1 @@ -1126,18 +1087,10 @@ stop 1 endif -#if REALCASE == 1 - top_border_send_buffer(:,:) = CONST_0_0 - top_border_recv_buffer(:,:) = CONST_0_0 - bottom_border_send_buffer(:,:) = CONST_0_0 - bottom_border_recv_buffer(:,:) = CONST_0_0 -#endif -#if COMPLEXCASE == 1 - top_border_send_buffer(:,:) = CONST_COMPLEX_0_0 - top_border_recv_buffer(:,:) = CONST_COMPLEX_0_0 - bottom_border_send_buffer(:,:) = CONST_COMPLEX_0_0 - bottom_border_recv_buffer(:,:) = CONST_COMPLEX_0_0 -#endif + top_border_send_buffer(:,:) = 0.0_rck + top_border_recv_buffer(:,:) = 0.0_rck + bottom_border_send_buffer(:,:) = 0.0_rck + bottom_border_recv_buffer(:,:) = 0.0_rck ! Initialize broadcast buffer #else /* WITH_OPENMP */ @@ -1174,19 +1127,10 @@ stop 1 endif -#if REALCASE == 1 - top_border_send_buffer(:,:,:) = CONST_0_0 - top_border_recv_buffer(:,:,:) = CONST_0_0 - bottom_border_send_buffer(:,:,:) = CONST_0_0 - bottom_border_recv_buffer(:,:,:) = CONST_0_0 -#endif -#if COMPLEXCASE == 1 - top_border_send_buffer(:,:,:) = CONST_COMPLEX_0_0 - top_border_recv_buffer(:,:,:) = CONST_COMPLEX_0_0 - bottom_border_send_buffer(:,:,:) = CONST_COMPLEX_0_0 - bottom_border_recv_buffer(:,:,:) = CONST_COMPLEX_0_0 -#endif - + top_border_send_buffer(:,:,:) = 0.0_rck + top_border_recv_buffer(:,:,:) = 0.0_rck + bottom_border_send_buffer(:,:,:) = 0.0_rck + bottom_border_recv_buffer(:,:,:) = 0.0_rck #endif /* WITH_OPENMP */ ! Initialize broadcast buffer @@ -1199,12 +1143,7 @@ stop 1 endif -#if REALCASE == 1 - bcast_buffer = CONST_0_0 -#endif -#if COMPLEXCASE == 1 - bcast_buffer = 0 -#endif + bcast_buffer = 0.0_rck if (useGPU) then num = ( nbw * max_blk_size) * size_of_datatype successCUDA = cuda_malloc(bcast_buffer_dev, num) @@ -1404,12 +1343,7 @@ else ! (current_local_n > 1) then ! for current_local_n == 1 the one and only HH Vector is 0 and not stored in hh_trans_real/complex -#if REALCASE == 1 - bcast_buffer(:,1) = CONST_0_0 -#endif -#if COMPLEXCASE == 1 - bcast_buffer(:,1) = CONST_COMPLEX_0_0 -#endif + bcast_buffer(:,1) = 0.0_rck if (useGPU) then successCUDA = cuda_memset(bcast_buffer_dev, 0, nbw * size_of_datatype) if (.not.(successCUDA)) then -- GitLab